├── .DS_Store ├── .gitignore ├── LICENSE ├── Notice.txt ├── README.md ├── docker ├── Apptainerfile.rocm ├── Dockerfile.awsefa ├── Dockerfile.ngc.vllm ├── Dockerfile.ngc.vllm0.8 ├── Dockerfile.ngc.vllm0.8.sagemaker ├── Dockerfile.rocm ├── Dockerfile.sglang ├── Dockerfile.vemlp.vllm.te ├── Dockerfile.vllm.sglang.megatron └── Dockerfile.vllm.sglang.megatron.deepseek ├── examples ├── .DS_Store ├── data_preprocess │ ├── aime2024_multiturn_w_tool.py │ ├── dapo_multiturn_w_tool.py │ ├── full_hh_rlhf.py │ ├── geo3k.py │ ├── gsm8k.py │ ├── gsm8k_multiturn_w_tool.py │ ├── hellaswag.py │ ├── math_dataset.py │ ├── multiturn.py │ └── preprocess_search_r1_dataset.py ├── generation │ ├── run_deepseek7b_mutli_node.sh │ └── run_deepseek_v2_lite_math.sh ├── grpo_trainer │ ├── README.md │ ├── run_deepseek671b_math_megatron.sh │ ├── run_deepseek7b_llm.sh │ ├── run_deepseek7b_llm_math.sh │ ├── run_deepseek7b_llm_math_megatron.sh │ ├── run_deepseek7b_llm_seq_balance.sh │ ├── run_moonlight16b_math_megatron.sh │ ├── run_qwen2-7b.sh │ ├── run_qwen2-7b_math.sh │ ├── run_qwen2-7b_math_megatron.sh │ ├── run_qwen2-7b_seq_balance.sh │ ├── run_qwen2-7b_seq_balance_math_megatron.sh │ ├── run_qwen2-7b_sgl_megatron.sh │ ├── run_qwen2_5-3b_gsm8k_grpo_lora.sh │ ├── run_qwen2_5-7b_math_megatron_diff_tp.sh │ ├── run_qwen2_5_vl-7b.sh │ ├── run_qwen3-236b_megatron.sh │ ├── run_qwen3-8b.sh │ └── run_qwen3moe-30b_megatron.sh ├── ppo_trainer │ ├── README.md │ ├── run_deepseek7b_llm.sh │ ├── run_deepseek7b_llm_modelscope.sh │ ├── run_deepseek7b_llm_pfppo.sh │ ├── run_deepseek7b_llm_sandbox_fusion.sh │ ├── run_deepseek7b_llm_sp2.sh │ ├── run_deepseek_full_hh_rlhf.sh │ ├── run_deepseek_math_gsm8k_megatron.sh │ ├── run_gemma.sh │ ├── run_moonlight16b_a3b_gsm8k_megatron.sh │ ├── run_qwen1.5_moe_a2.7b-gsm8k_megatron.sh │ ├── run_qwen2-7b_math_gsm8k_megatron.sh │ ├── run_qwen2-7b_rm.sh │ ├── run_qwen2-7b_rm_seq_balance.sh │ ├── run_qwen2-7b_seq_balance.sh │ ├── run_qwen2-7b_sglang_seq_balance.sh │ └── run_qwen2.5-32b.sh ├── ray │ └── tutorial.ipynb ├── reinforce_plus_plus_trainer │ ├── run_qwen2-7b_math_rf.sh │ └── run_qwen2-7b_math_rf_baseline.sh ├── remax_trainer │ ├── run_qwen2.5-3b_seq_balance.sh │ └── run_qwen2.5-7b_seq_balance.sh ├── rloo_trainer │ └── run_qwen2-7b.sh ├── sft │ ├── gsm8k │ │ ├── run_deepseek_6b7.sh │ │ ├── run_gemma_2b.sh │ │ ├── run_gemma_7b.sh │ │ ├── run_qwen_05_peft.sh │ │ ├── run_qwen_05_sp2.sh │ │ └── run_qwen_05_sp2_liger.sh │ └── multiturn │ │ └── run_qwen_05_sp2.sh ├── sglang_multiturn │ ├── README.md │ ├── config │ │ ├── gsm8k_multiturn_grpo.yaml │ │ ├── gsm8k_multiturn_megatron_grpo.yaml │ │ ├── retool_multiturn_grpo.yaml │ │ ├── search_multiturn_grpo.yaml │ │ └── tool_config │ │ │ ├── gsm8k_tool_config.yaml │ │ │ ├── sandbox_fusion_tool_config.yaml │ │ │ └── search_tool_config.yaml │ ├── run_qwen2.5-3b_gsm8k_multiturn.sh │ ├── run_qwen2.5-3b_gsm8k_multiturn_4xgpu.sh │ ├── run_qwen2.5-3b_megatron_gsm8k_multiturn.sh │ └── search_r1_like │ │ ├── local_dense_retriever │ │ ├── download.py │ │ └── retrieval_server.py │ │ └── run_qwen2.5-3b_instruct_search_multiturn.sh ├── slurm │ └── ray_on_slurm.slurm ├── split_placement │ ├── README.md │ ├── config │ │ └── ppo_trainer_split.yaml │ ├── main_ppo_split.py │ ├── run_deepseek7b_llm.sh │ └── split_monkey_patch.py ├── ssrl │ ├── .DS_Store │ ├── example.sh │ └── sim2real.sh └── tuning │ ├── 14b │ └── qwen2_14b_grpo_4_h800_fsdp_vllm.sh │ ├── 32b │ └── qwen2_32B_grpo_8_h20_megatron_vllm.sh │ ├── 70b │ ├── qwen2-70b_grpo_32_h20_fsdp_vllm.sh │ └── qwen2-70b_grpo_32_h800_fsdp_vllm.sh │ └── 7b │ └── qwen2-7b_grpo_2_h800_fsdp_vllm.sh ├── figs ├── .DS_Store ├── browsecomp_task_part3.png ├── multihop_qa_tasks_part2.png ├── qa_tasks_part1.png ├── results.png ├── results_sim.png ├── teaser.jpg ├── teaser.pdf └── tts.png ├── llm_agent ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── generation.cpython-310.pyc │ └── tensor_helper.cpython-310.pyc ├── generation.py └── tensor_helper.py ├── pyproject.toml ├── recipe ├── README.md ├── dapo │ ├── README.md │ ├── __pycache__ │ │ ├── dapo_ray_trainer.cpython-310.pyc │ │ └── main_dapo.cpython-310.pyc │ ├── config │ │ └── dapo_trainer.yaml │ ├── dapo_ray_trainer.py │ ├── main_dapo.py │ ├── prepare_dapo_data.sh │ ├── run_dapo_early_qwen2.5_32b.sh │ ├── run_dapo_qwen2.5_32b.sh │ ├── run_dapo_wo_ds_qwen2.5_32b.sh │ ├── test_dapo_7b.sh │ ├── test_dapo_7b_math.sh │ ├── test_dapo_7b_math_megatron.sh │ ├── test_dapo_dspk_671b_megatron.sh │ └── test_dapo_qwen3_30b_math.sh ├── prime │ ├── __init__.py │ ├── config │ │ └── prime_trainer.yaml │ ├── main_prime.py │ ├── prime_core_algos.py │ ├── prime_dp_rm.py │ ├── prime_fsdp_workers.py │ ├── prime_ray_trainer.py │ ├── run_prime_qwen.sh │ └── run_prime_qwen_code.sh ├── r1 │ ├── README.md │ ├── __init__.py │ ├── config │ │ └── evaluation.yaml │ ├── data_process.py │ ├── main_eval.py │ ├── reward_score.py │ ├── run_r1_distill_qwen.sh │ └── tasks │ │ ├── __init__.py │ │ ├── gpqa.py │ │ ├── livecodebench.py │ │ └── math.py ├── retool │ └── run_sft.sh ├── spin │ ├── README.md │ ├── config │ │ └── spin_trainer.yaml │ ├── core_algos.py │ ├── dp_actor.py │ ├── fsdp_workers.py │ ├── main_spin.py │ ├── run_spin.sh │ └── spin_trainer.py └── sppo │ ├── README.md │ ├── __init__.py │ ├── config │ └── sppo_trainer.yaml │ ├── dp_actor.py │ ├── main_sppo.py │ ├── run_qwen2.5-7b_rm.sh │ ├── sppo_ray_trainer.py │ └── sppo_worker.py ├── requirements-npu.txt ├── requirements.txt ├── requirements_sglang.txt ├── scripts ├── converter_hf_to_mcore.py ├── diagnose.py ├── init_random_model.py ├── install_vllm_sglang_mcore.sh └── model_merger.py ├── setup.py ├── tests ├── __init__.py ├── distributed │ ├── run_all.sh │ └── test_tensor_dict.py ├── e2e │ ├── __init__.py │ ├── arithmetic_sequence │ │ ├── data │ │ │ └── create_dataset.py │ │ ├── model │ │ │ ├── config.json │ │ │ ├── create_model_tokenizer.py │ │ │ ├── generation_config.json │ │ │ ├── model.safetensors │ │ │ └── tokenizer_config.json │ │ └── rl │ │ │ ├── README.md │ │ │ └── main_trainer.py │ ├── check_custom_rwd_fn.py │ ├── check_results.py │ ├── envs │ │ ├── __init__.py │ │ └── digit_completion │ │ │ ├── __init__.py │ │ │ ├── task.py │ │ │ └── tokenizer.py │ ├── generation │ │ └── run_gen_qwen05.sh │ ├── ppo_trainer │ │ ├── expert_parallel │ │ │ └── qwen2moe_minimal.json │ │ ├── run_function_reward.sh │ │ └── run_model_reward.sh │ ├── run_dapo.sh │ ├── run_gsm8k_fsdp_sgl_multiturn_sf_tool.sh │ ├── run_gsm8k_fsdp_sgl_multiturn_w_tool.sh │ ├── run_ppo_trainer_megatron.sh │ ├── run_prime.sh │ ├── run_r1_distill_qwen_aime24_eval.sh │ ├── run_ray_trainer.sh │ ├── run_ray_trainer_fire_sampling.sh │ ├── run_ray_trainer_rmpad.sh │ ├── run_spin.sh │ ├── run_sppo.sh │ ├── run_test.sh │ └── sft │ │ ├── run_sft.sh │ │ └── test_sp_loss_match.py ├── gpu_utility │ ├── test_memory_buffers.py │ ├── test_ops.py │ └── test_torch_functional.py ├── kernels │ └── test_linear_cross_entropy.py ├── kill_github_tests.sh ├── models │ ├── test_transformer.py │ └── test_transformers_ulysses.py ├── npu │ ├── run_qwen2_5_05b_dapo.sh │ ├── run_qwen2_5_05b_grpo.sh │ ├── run_qwen2_5_32b_grpo.sh │ └── run_qwen2_5_7b_grpo.sh ├── ray_cpu │ ├── check_worker_alive │ │ └── main.py │ ├── test_auto_padding.py │ ├── test_check_worker_alive.py │ ├── test_decorator.py │ ├── test_fused_workers.py │ ├── test_ray_local_envs.py │ └── test_ray_utils.py ├── ray_gpu │ ├── detached_worker │ │ ├── README.md │ │ ├── client.py │ │ ├── run.sh │ │ └── server.py │ ├── test_colocated_workers.py │ ├── test_colocated_workers_fused.py │ ├── test_data_transfer.py │ ├── test_driverfunc_to_worker.py │ ├── test_high_level_scheduling_api.py │ ├── test_rvdz.py │ ├── test_worker_group_basics.py │ └── test_worker_group_torch.py ├── reward_score │ └── test_sandbox_fusion.py ├── sandbox │ └── test_sandbox.py ├── sanity │ ├── check_license.py │ └── test_import.py ├── single_controller │ └── base │ │ └── test_decorator.py ├── test_protocol.py ├── trainer │ ├── __init__.py │ └── ppo │ │ ├── __init__.py │ │ └── test_metric_utils.py ├── utils │ ├── cpu_tests │ │ ├── _test_module.py │ │ ├── test_fs.py │ │ ├── test_import_utils.py │ │ ├── test_model.py │ │ └── test_timeout_decorator.py │ └── gpu_tests │ │ ├── checkpoint │ │ └── test_fsdp_ckpt.py │ │ ├── dataset │ │ ├── test_multiturn_sft_dataset.py │ │ ├── test_rl_dataset.py │ │ ├── test_rm_dataset.py │ │ └── test_sft_dataset.py │ │ ├── megatron │ │ └── test_pipeline_parallel.py │ │ ├── test_activation_offload.py │ │ ├── test_flops_counter.py │ │ ├── test_seqlen_balancing.py │ │ └── test_torch_functional.py └── workers │ └── rollout │ ├── async_rollout_utils.py │ ├── resource │ └── tool_configs │ │ ├── sandbox_fusion_tool_config │ │ └── search_tool_config │ ├── run_fsdp_vllm.py │ ├── test_async_sglang_server.py │ ├── test_custom_completion_callback.py │ ├── test_hf_rollout.py │ ├── test_sglang_async_rollout_search_tools.py │ ├── test_sglang_async_rollout_sf_tools.py │ ├── test_sglang_async_rollout_w_tools.py │ ├── test_sglang_spmd.py │ ├── test_vllm_chat_scheduler.py │ ├── test_vllm_hf_loader.py │ ├── test_vllm_spmd.py │ └── utils_sglang.py └── verl ├── __init__.py ├── __pycache__ ├── __init__.cpython-310.pyc └── protocol.cpython-310.pyc ├── models ├── README.md ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ └── registry.cpython-310.pyc ├── llama │ ├── __init__.py │ └── megatron │ │ ├── __init__.py │ │ ├── checkpoint_utils │ │ ├── __init__.py │ │ ├── llama_loader.py │ │ ├── llama_loader_depracated.py │ │ └── llama_saver.py │ │ ├── layers │ │ ├── __init__.py │ │ ├── parallel_attention.py │ │ ├── parallel_decoder.py │ │ ├── parallel_linear.py │ │ ├── parallel_mlp.py │ │ └── parallel_rmsnorm.py │ │ └── modeling_llama_megatron.py ├── mcore │ ├── __init__.py │ ├── config_converter.py │ ├── loader.py │ ├── model_forward.py │ ├── model_initializer.py │ ├── patch_v012.py │ ├── readme.md │ ├── registry.py │ ├── saver.py │ ├── util.py │ └── weight_converter.py ├── qwen2 │ ├── __init__.py │ └── megatron │ │ ├── __init__.py │ │ ├── checkpoint_utils │ │ ├── __init__.py │ │ ├── qwen2_loader.py │ │ ├── qwen2_loader_depracated.py │ │ └── qwen2_saver.py │ │ ├── layers │ │ ├── __init__.py │ │ ├── parallel_attention.py │ │ ├── parallel_decoder.py │ │ ├── parallel_linear.py │ │ ├── parallel_mlp.py │ │ └── parallel_rmsnorm.py │ │ └── modeling_qwen2_megatron.py ├── registry.py ├── transformers │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── monkey_patch.cpython-310.pyc │ ├── kimi_vl.py │ ├── llama.py │ ├── monkey_patch.py │ ├── qwen2.py │ ├── qwen2_5_vl.py │ └── qwen2_vl.py └── weight_loader_registry.py ├── protocol.py ├── single_controller ├── __init__.py ├── __pycache__ │ └── __init__.cpython-310.pyc ├── base │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── decorator.cpython-310.pyc │ │ ├── worker.cpython-310.pyc │ │ └── worker_group.cpython-310.pyc │ ├── decorator.py │ ├── megatron │ │ ├── __init__.py │ │ ├── worker.py │ │ └── worker_group.py │ ├── register_center │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── ray.cpython-310.pyc │ │ └── ray.py │ ├── worker.py │ └── worker_group.py └── ray │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ └── base.cpython-310.pyc │ ├── base.py │ └── megatron.py ├── third_party ├── __init__.py ├── __pycache__ │ └── __init__.cpython-310.pyc ├── sglang │ ├── __init__.py │ └── parallel_state.py └── vllm │ ├── __init__.py │ ├── __pycache__ │ └── __init__.cpython-310.pyc │ ├── vllm_v_0_5_4 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py │ └── vllm_v_0_6_3 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py ├── tools ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── base_tool.cpython-310.pyc │ └── schemas.cpython-310.pyc ├── base_tool.py ├── gsm8k_tool.py ├── sandbox_fusion_tools.py ├── schemas.py ├── search_tool.py └── utils │ ├── __init__.py │ └── search_r1_like_utils.py ├── trainer ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ └── main_ppo.cpython-310.pyc ├── config │ ├── evaluation.yaml │ ├── generation.yaml │ ├── ppo_megatron_trainer.yaml │ ├── ppo_trainer.yaml │ └── sft_trainer.yaml ├── fsdp_sft_trainer.py ├── main_eval.py ├── main_generation.py ├── main_ppo.py ├── ppo │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── core_algos.cpython-310.pyc │ │ ├── metric_utils.cpython-310.pyc │ │ ├── ray_trainer.cpython-310.pyc │ │ └── reward.cpython-310.pyc │ ├── core_algos.py │ ├── metric_utils.py │ ├── ray_trainer.py │ └── reward.py └── runtime_env.yaml ├── utils ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── activation_offload.cpython-310.pyc │ ├── device.cpython-310.pyc │ ├── flops_counter.cpython-310.pyc │ ├── fs.cpython-310.pyc │ ├── fsdp_utils.cpython-310.pyc │ ├── hdfs_io.cpython-310.pyc │ ├── import_utils.cpython-310.pyc │ ├── logging_utils.cpython-310.pyc │ ├── model.cpython-310.pyc │ ├── py_functional.cpython-310.pyc │ ├── ray_utils.cpython-310.pyc │ ├── seqlen_balancing.cpython-310.pyc │ ├── tokenizer.cpython-310.pyc │ ├── torch_dtypes.cpython-310.pyc │ ├── torch_functional.cpython-310.pyc │ ├── tracking.cpython-310.pyc │ ├── ulysses.cpython-310.pyc │ └── vllm_utils.cpython-310.pyc ├── activation_offload.py ├── checkpoint │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── checkpoint_manager.cpython-310.pyc │ │ └── fsdp_checkpoint_manager.cpython-310.pyc │ ├── checkpoint_manager.py │ ├── fsdp_checkpoint_manager.py │ └── megatron_checkpoint_manager.py ├── config.py ├── dataset │ ├── README.md │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── rl_dataset.cpython-310.pyc │ │ ├── rm_dataset.cpython-310.pyc │ │ └── sft_dataset.cpython-310.pyc │ ├── multiturn_sft_dataset.py │ ├── rl_dataset.py │ ├── rm_dataset.py │ ├── sft_dataset.py │ └── vision_utils.py ├── debug │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── performance.cpython-310.pyc │ ├── performance.py │ ├── profile.py │ └── trajectory_tracker.py ├── device.py ├── distributed.py ├── experimental │ ├── __init__.py │ └── torch_functional.py ├── flops_counter.py ├── fs.py ├── fsdp_utils.py ├── generation │ └── __init__.py ├── hdfs_io.py ├── import_utils.py ├── logger │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── aggregate_logger.cpython-310.pyc │ └── aggregate_logger.py ├── logging_utils.py ├── megatron │ ├── __init__.py │ ├── memory.py │ ├── optimizer.py │ ├── pipeline_parallel.py │ ├── sequence_parallel.py │ └── tensor_parallel.py ├── megatron_utils.py ├── memory_buffer.py ├── metric │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── utils.cpython-310.pyc │ └── utils.py ├── model.py ├── net_utils.py ├── py_functional.py ├── ray_utils.py ├── rendezvous │ ├── __init__.py │ └── ray_backend.py ├── reward_score │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── math_dapo.cpython-310.pyc │ ├── geo3k.py │ ├── gsm8k.py │ ├── math.py │ ├── math_batch.py │ ├── math_dapo.py │ ├── math_verify.py │ ├── prime_code │ │ ├── __init__.py │ │ ├── testing_util.py │ │ └── utils.py │ ├── prime_math │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── grader.cpython-310.pyc │ │ │ └── math_normalize.cpython-310.pyc │ │ ├── grader.py │ │ └── math_normalize.py │ ├── sandbox_fusion │ │ ├── __init__.py │ │ └── utils.py │ └── search_r1_like_qa_em.py ├── seqlen_balancing.py ├── tokenizer.py ├── torch_dtypes.py ├── torch_functional.py ├── tracking.py ├── ulysses.py └── vllm_utils.py ├── version └── version └── workers ├── __init__.py ├── __pycache__ ├── __init__.cpython-310.pyc └── fsdp_workers.cpython-310.pyc ├── actor ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── base.cpython-310.pyc │ └── dp_actor.cpython-310.pyc ├── base.py ├── dp_actor.py └── megatron_actor.py ├── critic ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── base.cpython-310.pyc │ └── dp_critic.cpython-310.pyc ├── base.py ├── dp_critic.py └── megatron_critic.py ├── fsdp_workers.py ├── megatron_workers.py ├── reward_manager ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── batch.cpython-310.pyc │ ├── dapo.cpython-310.pyc │ ├── naive.cpython-310.pyc │ ├── naive_math.cpython-310.pyc │ └── prime.cpython-310.pyc ├── batch.py ├── dapo.py ├── em.py ├── naive.py └── prime.py ├── reward_model ├── __init__.py ├── base.py └── megatron │ ├── __init__.py │ └── reward_model.py ├── rollout ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── async_server.cpython-310.pyc │ ├── base.cpython-310.pyc │ ├── chat_scheduler.cpython-310.pyc │ └── hf_rollout.cpython-310.pyc ├── async_server.py ├── base.py ├── chat_scheduler.py ├── hf_rollout.py ├── naive │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── naive_rollout.cpython-310.pyc │ └── naive_rollout.py ├── schemas.py ├── sglang_rollout │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── sglang_rollout.cpython-310.pyc │ ├── async_sglang_server.py │ ├── sglang_rollout.py │ └── utils.py ├── tokenizer.py └── vllm_rollout │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ └── vllm_rollout_spmd.cpython-310.pyc │ ├── fire_vllm_rollout.py │ ├── vllm_async_server.py │ ├── vllm_rollout.py │ └── vllm_rollout_spmd.py └── sharding_manager ├── __init__.py ├── __pycache__ ├── __init__.cpython-310.pyc ├── base.cpython-310.pyc ├── fsdp_ulysses.cpython-310.pyc └── fsdp_vllm.cpython-310.pyc ├── base.py ├── fsdp_sglang.py ├── fsdp_ulysses.py ├── fsdp_vllm.py ├── megatron_sglang.py └── megatron_vllm.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/LICENSE -------------------------------------------------------------------------------- /Notice.txt: -------------------------------------------------------------------------------- 1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/README.md -------------------------------------------------------------------------------- /docker/Apptainerfile.rocm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/docker/Apptainerfile.rocm -------------------------------------------------------------------------------- /docker/Dockerfile.awsefa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/docker/Dockerfile.awsefa -------------------------------------------------------------------------------- /docker/Dockerfile.ngc.vllm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/docker/Dockerfile.ngc.vllm -------------------------------------------------------------------------------- /docker/Dockerfile.ngc.vllm0.8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/docker/Dockerfile.ngc.vllm0.8 -------------------------------------------------------------------------------- /docker/Dockerfile.ngc.vllm0.8.sagemaker: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/docker/Dockerfile.ngc.vllm0.8.sagemaker -------------------------------------------------------------------------------- /docker/Dockerfile.rocm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/docker/Dockerfile.rocm -------------------------------------------------------------------------------- /docker/Dockerfile.sglang: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/docker/Dockerfile.sglang -------------------------------------------------------------------------------- /docker/Dockerfile.vemlp.vllm.te: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/docker/Dockerfile.vemlp.vllm.te -------------------------------------------------------------------------------- /docker/Dockerfile.vllm.sglang.megatron: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/docker/Dockerfile.vllm.sglang.megatron -------------------------------------------------------------------------------- /docker/Dockerfile.vllm.sglang.megatron.deepseek: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/docker/Dockerfile.vllm.sglang.megatron.deepseek -------------------------------------------------------------------------------- /examples/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/.DS_Store -------------------------------------------------------------------------------- /examples/data_preprocess/aime2024_multiturn_w_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/data_preprocess/aime2024_multiturn_w_tool.py -------------------------------------------------------------------------------- /examples/data_preprocess/dapo_multiturn_w_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/data_preprocess/dapo_multiturn_w_tool.py -------------------------------------------------------------------------------- /examples/data_preprocess/full_hh_rlhf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/data_preprocess/full_hh_rlhf.py -------------------------------------------------------------------------------- /examples/data_preprocess/geo3k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/data_preprocess/geo3k.py -------------------------------------------------------------------------------- /examples/data_preprocess/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/data_preprocess/gsm8k.py -------------------------------------------------------------------------------- /examples/data_preprocess/gsm8k_multiturn_w_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/data_preprocess/gsm8k_multiturn_w_tool.py -------------------------------------------------------------------------------- /examples/data_preprocess/hellaswag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/data_preprocess/hellaswag.py -------------------------------------------------------------------------------- /examples/data_preprocess/math_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/data_preprocess/math_dataset.py -------------------------------------------------------------------------------- /examples/data_preprocess/multiturn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/data_preprocess/multiturn.py -------------------------------------------------------------------------------- /examples/data_preprocess/preprocess_search_r1_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/data_preprocess/preprocess_search_r1_dataset.py -------------------------------------------------------------------------------- /examples/generation/run_deepseek7b_mutli_node.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/generation/run_deepseek7b_mutli_node.sh -------------------------------------------------------------------------------- /examples/generation/run_deepseek_v2_lite_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/generation/run_deepseek_v2_lite_math.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/README.md -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek671b_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_deepseek671b_math_megatron.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_deepseek7b_llm_math.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_deepseek7b_llm_math_megatron.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_moonlight16b_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_moonlight16b_math_megatron.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_qwen2-7b.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_qwen2-7b_math.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_qwen2-7b_math_megatron.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_qwen2-7b_seq_balance.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b_seq_balance_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_qwen2-7b_seq_balance_math_megatron.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2-7b_sgl_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_qwen2-7b_sgl_megatron.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2_5-3b_gsm8k_grpo_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_qwen2_5-3b_gsm8k_grpo_lora.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2_5-7b_math_megatron_diff_tp.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_qwen2_5-7b_math_megatron_diff_tp.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen2_5_vl-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_qwen2_5_vl-7b.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen3-236b_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_qwen3-236b_megatron.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen3-8b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_qwen3-8b.sh -------------------------------------------------------------------------------- /examples/grpo_trainer/run_qwen3moe-30b_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/grpo_trainer/run_qwen3moe-30b_megatron.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ppo_trainer/README.md -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ppo_trainer/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm_modelscope.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ppo_trainer/run_deepseek7b_llm_modelscope.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm_pfppo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ppo_trainer/run_deepseek7b_llm_pfppo.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm_sandbox_fusion.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ppo_trainer/run_deepseek7b_llm_sandbox_fusion.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek7b_llm_sp2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ppo_trainer/run_deepseek7b_llm_sp2.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_gemma.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ppo_trainer/run_gemma.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_moonlight16b_a3b_gsm8k_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ppo_trainer/run_moonlight16b_a3b_gsm8k_megatron.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen1.5_moe_a2.7b-gsm8k_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ppo_trainer/run_qwen1.5_moe_a2.7b-gsm8k_megatron.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b_math_gsm8k_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ppo_trainer/run_qwen2-7b_math_gsm8k_megatron.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ppo_trainer/run_qwen2-7b_rm.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b_rm_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ppo_trainer/run_qwen2-7b_rm_seq_balance.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ppo_trainer/run_qwen2-7b_seq_balance.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2-7b_sglang_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ppo_trainer/run_qwen2-7b_sglang_seq_balance.sh -------------------------------------------------------------------------------- /examples/ppo_trainer/run_qwen2.5-32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ppo_trainer/run_qwen2.5-32b.sh -------------------------------------------------------------------------------- /examples/ray/tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ray/tutorial.ipynb -------------------------------------------------------------------------------- /examples/reinforce_plus_plus_trainer/run_qwen2-7b_math_rf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/reinforce_plus_plus_trainer/run_qwen2-7b_math_rf.sh -------------------------------------------------------------------------------- /examples/reinforce_plus_plus_trainer/run_qwen2-7b_math_rf_baseline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/reinforce_plus_plus_trainer/run_qwen2-7b_math_rf_baseline.sh -------------------------------------------------------------------------------- /examples/remax_trainer/run_qwen2.5-3b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/remax_trainer/run_qwen2.5-3b_seq_balance.sh -------------------------------------------------------------------------------- /examples/remax_trainer/run_qwen2.5-7b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/remax_trainer/run_qwen2.5-7b_seq_balance.sh -------------------------------------------------------------------------------- /examples/rloo_trainer/run_qwen2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/rloo_trainer/run_qwen2-7b.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_deepseek_6b7.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sft/gsm8k/run_deepseek_6b7.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_gemma_2b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sft/gsm8k/run_gemma_2b.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_gemma_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sft/gsm8k/run_gemma_7b.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_peft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sft/gsm8k/run_qwen_05_peft.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_sp2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sft/gsm8k/run_qwen_05_sp2.sh -------------------------------------------------------------------------------- /examples/sft/gsm8k/run_qwen_05_sp2_liger.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sft/gsm8k/run_qwen_05_sp2_liger.sh -------------------------------------------------------------------------------- /examples/sft/multiturn/run_qwen_05_sp2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sft/multiturn/run_qwen_05_sp2.sh -------------------------------------------------------------------------------- /examples/sglang_multiturn/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sglang_multiturn/README.md -------------------------------------------------------------------------------- /examples/sglang_multiturn/config/gsm8k_multiturn_grpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sglang_multiturn/config/gsm8k_multiturn_grpo.yaml -------------------------------------------------------------------------------- /examples/sglang_multiturn/config/gsm8k_multiturn_megatron_grpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sglang_multiturn/config/gsm8k_multiturn_megatron_grpo.yaml -------------------------------------------------------------------------------- /examples/sglang_multiturn/config/retool_multiturn_grpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sglang_multiturn/config/retool_multiturn_grpo.yaml -------------------------------------------------------------------------------- /examples/sglang_multiturn/config/search_multiturn_grpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sglang_multiturn/config/search_multiturn_grpo.yaml -------------------------------------------------------------------------------- /examples/sglang_multiturn/config/tool_config/gsm8k_tool_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sglang_multiturn/config/tool_config/gsm8k_tool_config.yaml -------------------------------------------------------------------------------- /examples/sglang_multiturn/config/tool_config/sandbox_fusion_tool_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sglang_multiturn/config/tool_config/sandbox_fusion_tool_config.yaml -------------------------------------------------------------------------------- /examples/sglang_multiturn/config/tool_config/search_tool_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sglang_multiturn/config/tool_config/search_tool_config.yaml -------------------------------------------------------------------------------- /examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn.sh -------------------------------------------------------------------------------- /examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn_4xgpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sglang_multiturn/run_qwen2.5-3b_gsm8k_multiturn_4xgpu.sh -------------------------------------------------------------------------------- /examples/sglang_multiturn/run_qwen2.5-3b_megatron_gsm8k_multiturn.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sglang_multiturn/run_qwen2.5-3b_megatron_gsm8k_multiturn.sh -------------------------------------------------------------------------------- /examples/sglang_multiturn/search_r1_like/local_dense_retriever/download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sglang_multiturn/search_r1_like/local_dense_retriever/download.py -------------------------------------------------------------------------------- /examples/sglang_multiturn/search_r1_like/local_dense_retriever/retrieval_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sglang_multiturn/search_r1_like/local_dense_retriever/retrieval_server.py -------------------------------------------------------------------------------- /examples/sglang_multiturn/search_r1_like/run_qwen2.5-3b_instruct_search_multiturn.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/sglang_multiturn/search_r1_like/run_qwen2.5-3b_instruct_search_multiturn.sh -------------------------------------------------------------------------------- /examples/slurm/ray_on_slurm.slurm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/slurm/ray_on_slurm.slurm -------------------------------------------------------------------------------- /examples/split_placement/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/split_placement/README.md -------------------------------------------------------------------------------- /examples/split_placement/config/ppo_trainer_split.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/split_placement/config/ppo_trainer_split.yaml -------------------------------------------------------------------------------- /examples/split_placement/main_ppo_split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/split_placement/main_ppo_split.py -------------------------------------------------------------------------------- /examples/split_placement/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/split_placement/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /examples/split_placement/split_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/split_placement/split_monkey_patch.py -------------------------------------------------------------------------------- /examples/ssrl/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ssrl/.DS_Store -------------------------------------------------------------------------------- /examples/ssrl/example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ssrl/example.sh -------------------------------------------------------------------------------- /examples/ssrl/sim2real.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/ssrl/sim2real.sh -------------------------------------------------------------------------------- /examples/tuning/14b/qwen2_14b_grpo_4_h800_fsdp_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/tuning/14b/qwen2_14b_grpo_4_h800_fsdp_vllm.sh -------------------------------------------------------------------------------- /examples/tuning/32b/qwen2_32B_grpo_8_h20_megatron_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/tuning/32b/qwen2_32B_grpo_8_h20_megatron_vllm.sh -------------------------------------------------------------------------------- /examples/tuning/70b/qwen2-70b_grpo_32_h20_fsdp_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/tuning/70b/qwen2-70b_grpo_32_h20_fsdp_vllm.sh -------------------------------------------------------------------------------- /examples/tuning/70b/qwen2-70b_grpo_32_h800_fsdp_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/tuning/70b/qwen2-70b_grpo_32_h800_fsdp_vllm.sh -------------------------------------------------------------------------------- /examples/tuning/7b/qwen2-7b_grpo_2_h800_fsdp_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/examples/tuning/7b/qwen2-7b_grpo_2_h800_fsdp_vllm.sh -------------------------------------------------------------------------------- /figs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/figs/.DS_Store -------------------------------------------------------------------------------- /figs/browsecomp_task_part3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/figs/browsecomp_task_part3.png -------------------------------------------------------------------------------- /figs/multihop_qa_tasks_part2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/figs/multihop_qa_tasks_part2.png -------------------------------------------------------------------------------- /figs/qa_tasks_part1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/figs/qa_tasks_part1.png -------------------------------------------------------------------------------- /figs/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/figs/results.png -------------------------------------------------------------------------------- /figs/results_sim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/figs/results_sim.png -------------------------------------------------------------------------------- /figs/teaser.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/figs/teaser.jpg -------------------------------------------------------------------------------- /figs/teaser.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/figs/teaser.pdf -------------------------------------------------------------------------------- /figs/tts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/figs/tts.png -------------------------------------------------------------------------------- /llm_agent/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/llm_agent/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /llm_agent/__pycache__/generation.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/llm_agent/__pycache__/generation.cpython-310.pyc -------------------------------------------------------------------------------- /llm_agent/__pycache__/tensor_helper.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/llm_agent/__pycache__/tensor_helper.cpython-310.pyc -------------------------------------------------------------------------------- /llm_agent/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/llm_agent/generation.py -------------------------------------------------------------------------------- /llm_agent/tensor_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/llm_agent/tensor_helper.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/pyproject.toml -------------------------------------------------------------------------------- /recipe/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/README.md -------------------------------------------------------------------------------- /recipe/dapo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/dapo/README.md -------------------------------------------------------------------------------- /recipe/dapo/__pycache__/dapo_ray_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/dapo/__pycache__/dapo_ray_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /recipe/dapo/__pycache__/main_dapo.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/dapo/__pycache__/main_dapo.cpython-310.pyc -------------------------------------------------------------------------------- /recipe/dapo/config/dapo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/dapo/config/dapo_trainer.yaml -------------------------------------------------------------------------------- /recipe/dapo/dapo_ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/dapo/dapo_ray_trainer.py -------------------------------------------------------------------------------- /recipe/dapo/main_dapo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/dapo/main_dapo.py -------------------------------------------------------------------------------- /recipe/dapo/prepare_dapo_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/dapo/prepare_dapo_data.sh -------------------------------------------------------------------------------- /recipe/dapo/run_dapo_early_qwen2.5_32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/dapo/run_dapo_early_qwen2.5_32b.sh -------------------------------------------------------------------------------- /recipe/dapo/run_dapo_qwen2.5_32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/dapo/run_dapo_qwen2.5_32b.sh -------------------------------------------------------------------------------- /recipe/dapo/run_dapo_wo_ds_qwen2.5_32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/dapo/run_dapo_wo_ds_qwen2.5_32b.sh -------------------------------------------------------------------------------- /recipe/dapo/test_dapo_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/dapo/test_dapo_7b.sh -------------------------------------------------------------------------------- /recipe/dapo/test_dapo_7b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/dapo/test_dapo_7b_math.sh -------------------------------------------------------------------------------- /recipe/dapo/test_dapo_7b_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/dapo/test_dapo_7b_math_megatron.sh -------------------------------------------------------------------------------- /recipe/dapo/test_dapo_dspk_671b_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/dapo/test_dapo_dspk_671b_megatron.sh -------------------------------------------------------------------------------- /recipe/dapo/test_dapo_qwen3_30b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/dapo/test_dapo_qwen3_30b_math.sh -------------------------------------------------------------------------------- /recipe/prime/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/prime/__init__.py -------------------------------------------------------------------------------- /recipe/prime/config/prime_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/prime/config/prime_trainer.yaml -------------------------------------------------------------------------------- /recipe/prime/main_prime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/prime/main_prime.py -------------------------------------------------------------------------------- /recipe/prime/prime_core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/prime/prime_core_algos.py -------------------------------------------------------------------------------- /recipe/prime/prime_dp_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/prime/prime_dp_rm.py -------------------------------------------------------------------------------- /recipe/prime/prime_fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/prime/prime_fsdp_workers.py -------------------------------------------------------------------------------- /recipe/prime/prime_ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/prime/prime_ray_trainer.py -------------------------------------------------------------------------------- /recipe/prime/run_prime_qwen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/prime/run_prime_qwen.sh -------------------------------------------------------------------------------- /recipe/prime/run_prime_qwen_code.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/prime/run_prime_qwen_code.sh -------------------------------------------------------------------------------- /recipe/r1/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/r1/README.md -------------------------------------------------------------------------------- /recipe/r1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/r1/__init__.py -------------------------------------------------------------------------------- /recipe/r1/config/evaluation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/r1/config/evaluation.yaml -------------------------------------------------------------------------------- /recipe/r1/data_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/r1/data_process.py -------------------------------------------------------------------------------- /recipe/r1/main_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/r1/main_eval.py -------------------------------------------------------------------------------- /recipe/r1/reward_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/r1/reward_score.py -------------------------------------------------------------------------------- /recipe/r1/run_r1_distill_qwen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/r1/run_r1_distill_qwen.sh -------------------------------------------------------------------------------- /recipe/r1/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/r1/tasks/__init__.py -------------------------------------------------------------------------------- /recipe/r1/tasks/gpqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/r1/tasks/gpqa.py -------------------------------------------------------------------------------- /recipe/r1/tasks/livecodebench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/r1/tasks/livecodebench.py -------------------------------------------------------------------------------- /recipe/r1/tasks/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/r1/tasks/math.py -------------------------------------------------------------------------------- /recipe/retool/run_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/retool/run_sft.sh -------------------------------------------------------------------------------- /recipe/spin/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/spin/README.md -------------------------------------------------------------------------------- /recipe/spin/config/spin_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/spin/config/spin_trainer.yaml -------------------------------------------------------------------------------- /recipe/spin/core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/spin/core_algos.py -------------------------------------------------------------------------------- /recipe/spin/dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/spin/dp_actor.py -------------------------------------------------------------------------------- /recipe/spin/fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/spin/fsdp_workers.py -------------------------------------------------------------------------------- /recipe/spin/main_spin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/spin/main_spin.py -------------------------------------------------------------------------------- /recipe/spin/run_spin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/spin/run_spin.sh -------------------------------------------------------------------------------- /recipe/spin/spin_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/spin/spin_trainer.py -------------------------------------------------------------------------------- /recipe/sppo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/sppo/README.md -------------------------------------------------------------------------------- /recipe/sppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/sppo/__init__.py -------------------------------------------------------------------------------- /recipe/sppo/config/sppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/sppo/config/sppo_trainer.yaml -------------------------------------------------------------------------------- /recipe/sppo/dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/sppo/dp_actor.py -------------------------------------------------------------------------------- /recipe/sppo/main_sppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/sppo/main_sppo.py -------------------------------------------------------------------------------- /recipe/sppo/run_qwen2.5-7b_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/sppo/run_qwen2.5-7b_rm.sh -------------------------------------------------------------------------------- /recipe/sppo/sppo_ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/sppo/sppo_ray_trainer.py -------------------------------------------------------------------------------- /recipe/sppo/sppo_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/recipe/sppo/sppo_worker.py -------------------------------------------------------------------------------- /requirements-npu.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/requirements-npu.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/requirements.txt -------------------------------------------------------------------------------- /requirements_sglang.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/requirements_sglang.txt -------------------------------------------------------------------------------- /scripts/converter_hf_to_mcore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/scripts/converter_hf_to_mcore.py -------------------------------------------------------------------------------- /scripts/diagnose.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/scripts/diagnose.py -------------------------------------------------------------------------------- /scripts/init_random_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/scripts/init_random_model.py -------------------------------------------------------------------------------- /scripts/install_vllm_sglang_mcore.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/scripts/install_vllm_sglang_mcore.sh -------------------------------------------------------------------------------- /scripts/model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/scripts/model_merger.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/setup.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/__init__.py -------------------------------------------------------------------------------- /tests/distributed/run_all.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/distributed/run_all.sh -------------------------------------------------------------------------------- /tests/distributed/test_tensor_dict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/distributed/test_tensor_dict.py -------------------------------------------------------------------------------- /tests/e2e/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/__init__.py -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/data/create_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/arithmetic_sequence/data/create_dataset.py -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/arithmetic_sequence/model/config.json -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/create_model_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/arithmetic_sequence/model/create_model_tokenizer.py -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/generation_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/arithmetic_sequence/model/generation_config.json -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/model.safetensors: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/arithmetic_sequence/model/model.safetensors -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/model/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/arithmetic_sequence/model/tokenizer_config.json -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/rl/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/arithmetic_sequence/rl/README.md -------------------------------------------------------------------------------- /tests/e2e/arithmetic_sequence/rl/main_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/arithmetic_sequence/rl/main_trainer.py -------------------------------------------------------------------------------- /tests/e2e/check_custom_rwd_fn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/check_custom_rwd_fn.py -------------------------------------------------------------------------------- /tests/e2e/check_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/check_results.py -------------------------------------------------------------------------------- /tests/e2e/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/envs/__init__.py -------------------------------------------------------------------------------- /tests/e2e/envs/digit_completion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/envs/digit_completion/__init__.py -------------------------------------------------------------------------------- /tests/e2e/envs/digit_completion/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/envs/digit_completion/task.py -------------------------------------------------------------------------------- /tests/e2e/envs/digit_completion/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/envs/digit_completion/tokenizer.py -------------------------------------------------------------------------------- /tests/e2e/generation/run_gen_qwen05.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/generation/run_gen_qwen05.sh -------------------------------------------------------------------------------- /tests/e2e/ppo_trainer/expert_parallel/qwen2moe_minimal.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/ppo_trainer/expert_parallel/qwen2moe_minimal.json -------------------------------------------------------------------------------- /tests/e2e/ppo_trainer/run_function_reward.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/ppo_trainer/run_function_reward.sh -------------------------------------------------------------------------------- /tests/e2e/ppo_trainer/run_model_reward.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/ppo_trainer/run_model_reward.sh -------------------------------------------------------------------------------- /tests/e2e/run_dapo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/run_dapo.sh -------------------------------------------------------------------------------- /tests/e2e/run_gsm8k_fsdp_sgl_multiturn_sf_tool.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/run_gsm8k_fsdp_sgl_multiturn_sf_tool.sh -------------------------------------------------------------------------------- /tests/e2e/run_gsm8k_fsdp_sgl_multiturn_w_tool.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/run_gsm8k_fsdp_sgl_multiturn_w_tool.sh -------------------------------------------------------------------------------- /tests/e2e/run_ppo_trainer_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/run_ppo_trainer_megatron.sh -------------------------------------------------------------------------------- /tests/e2e/run_prime.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/run_prime.sh -------------------------------------------------------------------------------- /tests/e2e/run_r1_distill_qwen_aime24_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/run_r1_distill_qwen_aime24_eval.sh -------------------------------------------------------------------------------- /tests/e2e/run_ray_trainer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/run_ray_trainer.sh -------------------------------------------------------------------------------- /tests/e2e/run_ray_trainer_fire_sampling.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/run_ray_trainer_fire_sampling.sh -------------------------------------------------------------------------------- /tests/e2e/run_ray_trainer_rmpad.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/run_ray_trainer_rmpad.sh -------------------------------------------------------------------------------- /tests/e2e/run_spin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/run_spin.sh -------------------------------------------------------------------------------- /tests/e2e/run_sppo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/run_sppo.sh -------------------------------------------------------------------------------- /tests/e2e/run_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/run_test.sh -------------------------------------------------------------------------------- /tests/e2e/sft/run_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/sft/run_sft.sh -------------------------------------------------------------------------------- /tests/e2e/sft/test_sp_loss_match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/e2e/sft/test_sp_loss_match.py -------------------------------------------------------------------------------- /tests/gpu_utility/test_memory_buffers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/gpu_utility/test_memory_buffers.py -------------------------------------------------------------------------------- /tests/gpu_utility/test_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/gpu_utility/test_ops.py -------------------------------------------------------------------------------- /tests/gpu_utility/test_torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/gpu_utility/test_torch_functional.py -------------------------------------------------------------------------------- /tests/kernels/test_linear_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/kernels/test_linear_cross_entropy.py -------------------------------------------------------------------------------- /tests/kill_github_tests.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/kill_github_tests.sh -------------------------------------------------------------------------------- /tests/models/test_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/models/test_transformer.py -------------------------------------------------------------------------------- /tests/models/test_transformers_ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/models/test_transformers_ulysses.py -------------------------------------------------------------------------------- /tests/npu/run_qwen2_5_05b_dapo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/npu/run_qwen2_5_05b_dapo.sh -------------------------------------------------------------------------------- /tests/npu/run_qwen2_5_05b_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/npu/run_qwen2_5_05b_grpo.sh -------------------------------------------------------------------------------- /tests/npu/run_qwen2_5_32b_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/npu/run_qwen2_5_32b_grpo.sh -------------------------------------------------------------------------------- /tests/npu/run_qwen2_5_7b_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/npu/run_qwen2_5_7b_grpo.sh -------------------------------------------------------------------------------- /tests/ray_cpu/check_worker_alive/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_cpu/check_worker_alive/main.py -------------------------------------------------------------------------------- /tests/ray_cpu/test_auto_padding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_cpu/test_auto_padding.py -------------------------------------------------------------------------------- /tests/ray_cpu/test_check_worker_alive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_cpu/test_check_worker_alive.py -------------------------------------------------------------------------------- /tests/ray_cpu/test_decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_cpu/test_decorator.py -------------------------------------------------------------------------------- /tests/ray_cpu/test_fused_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_cpu/test_fused_workers.py -------------------------------------------------------------------------------- /tests/ray_cpu/test_ray_local_envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_cpu/test_ray_local_envs.py -------------------------------------------------------------------------------- /tests/ray_cpu/test_ray_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_cpu/test_ray_utils.py -------------------------------------------------------------------------------- /tests/ray_gpu/detached_worker/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_gpu/detached_worker/README.md -------------------------------------------------------------------------------- /tests/ray_gpu/detached_worker/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_gpu/detached_worker/client.py -------------------------------------------------------------------------------- /tests/ray_gpu/detached_worker/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_gpu/detached_worker/run.sh -------------------------------------------------------------------------------- /tests/ray_gpu/detached_worker/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_gpu/detached_worker/server.py -------------------------------------------------------------------------------- /tests/ray_gpu/test_colocated_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_gpu/test_colocated_workers.py -------------------------------------------------------------------------------- /tests/ray_gpu/test_colocated_workers_fused.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_gpu/test_colocated_workers_fused.py -------------------------------------------------------------------------------- /tests/ray_gpu/test_data_transfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_gpu/test_data_transfer.py -------------------------------------------------------------------------------- /tests/ray_gpu/test_driverfunc_to_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_gpu/test_driverfunc_to_worker.py -------------------------------------------------------------------------------- /tests/ray_gpu/test_high_level_scheduling_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_gpu/test_high_level_scheduling_api.py -------------------------------------------------------------------------------- /tests/ray_gpu/test_rvdz.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_gpu/test_rvdz.py -------------------------------------------------------------------------------- /tests/ray_gpu/test_worker_group_basics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_gpu/test_worker_group_basics.py -------------------------------------------------------------------------------- /tests/ray_gpu/test_worker_group_torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/ray_gpu/test_worker_group_torch.py -------------------------------------------------------------------------------- /tests/reward_score/test_sandbox_fusion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/reward_score/test_sandbox_fusion.py -------------------------------------------------------------------------------- /tests/sandbox/test_sandbox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/sandbox/test_sandbox.py -------------------------------------------------------------------------------- /tests/sanity/check_license.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/sanity/check_license.py -------------------------------------------------------------------------------- /tests/sanity/test_import.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/sanity/test_import.py -------------------------------------------------------------------------------- /tests/single_controller/base/test_decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/single_controller/base/test_decorator.py -------------------------------------------------------------------------------- /tests/test_protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/test_protocol.py -------------------------------------------------------------------------------- /tests/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/trainer/__init__.py -------------------------------------------------------------------------------- /tests/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/trainer/ppo/__init__.py -------------------------------------------------------------------------------- /tests/trainer/ppo/test_metric_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/trainer/ppo/test_metric_utils.py -------------------------------------------------------------------------------- /tests/utils/cpu_tests/_test_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/utils/cpu_tests/_test_module.py -------------------------------------------------------------------------------- /tests/utils/cpu_tests/test_fs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/utils/cpu_tests/test_fs.py -------------------------------------------------------------------------------- /tests/utils/cpu_tests/test_import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/utils/cpu_tests/test_import_utils.py -------------------------------------------------------------------------------- /tests/utils/cpu_tests/test_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/utils/cpu_tests/test_model.py -------------------------------------------------------------------------------- /tests/utils/cpu_tests/test_timeout_decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/utils/cpu_tests/test_timeout_decorator.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/checkpoint/test_fsdp_ckpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/utils/gpu_tests/checkpoint/test_fsdp_ckpt.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/dataset/test_multiturn_sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/utils/gpu_tests/dataset/test_multiturn_sft_dataset.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/dataset/test_rl_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/utils/gpu_tests/dataset/test_rl_dataset.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/dataset/test_rm_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/utils/gpu_tests/dataset/test_rm_dataset.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/dataset/test_sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/utils/gpu_tests/dataset/test_sft_dataset.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/megatron/test_pipeline_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/utils/gpu_tests/megatron/test_pipeline_parallel.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/test_activation_offload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/utils/gpu_tests/test_activation_offload.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/test_flops_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/utils/gpu_tests/test_flops_counter.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/test_seqlen_balancing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/utils/gpu_tests/test_seqlen_balancing.py -------------------------------------------------------------------------------- /tests/utils/gpu_tests/test_torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/utils/gpu_tests/test_torch_functional.py -------------------------------------------------------------------------------- /tests/workers/rollout/async_rollout_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/workers/rollout/async_rollout_utils.py -------------------------------------------------------------------------------- /tests/workers/rollout/resource/tool_configs/sandbox_fusion_tool_config: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/workers/rollout/resource/tool_configs/sandbox_fusion_tool_config -------------------------------------------------------------------------------- /tests/workers/rollout/resource/tool_configs/search_tool_config: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/workers/rollout/resource/tool_configs/search_tool_config -------------------------------------------------------------------------------- /tests/workers/rollout/run_fsdp_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/workers/rollout/run_fsdp_vllm.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_async_sglang_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/workers/rollout/test_async_sglang_server.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_custom_completion_callback.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/workers/rollout/test_custom_completion_callback.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_hf_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/workers/rollout/test_hf_rollout.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_sglang_async_rollout_search_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/workers/rollout/test_sglang_async_rollout_search_tools.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_sglang_async_rollout_sf_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/workers/rollout/test_sglang_async_rollout_sf_tools.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_sglang_async_rollout_w_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/workers/rollout/test_sglang_async_rollout_w_tools.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_sglang_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/workers/rollout/test_sglang_spmd.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_vllm_chat_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/workers/rollout/test_vllm_chat_scheduler.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_vllm_hf_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/workers/rollout/test_vllm_hf_loader.py -------------------------------------------------------------------------------- /tests/workers/rollout/test_vllm_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/workers/rollout/test_vllm_spmd.py -------------------------------------------------------------------------------- /tests/workers/rollout/utils_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/tests/workers/rollout/utils_sglang.py -------------------------------------------------------------------------------- /verl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/__init__.py -------------------------------------------------------------------------------- /verl/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/__pycache__/protocol.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/__pycache__/protocol.cpython-310.pyc -------------------------------------------------------------------------------- /verl/models/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/README.md -------------------------------------------------------------------------------- /verl/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/__init__.py -------------------------------------------------------------------------------- /verl/models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/models/__pycache__/registry.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/__pycache__/registry.cpython-310.pyc -------------------------------------------------------------------------------- /verl/models/llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/llama/__init__.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/llama/megatron/__init__.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/llama/megatron/checkpoint_utils/__init__.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/checkpoint_utils/llama_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/llama/megatron/checkpoint_utils/llama_loader.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/checkpoint_utils/llama_loader_depracated.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/llama/megatron/checkpoint_utils/llama_loader_depracated.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/checkpoint_utils/llama_saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/llama/megatron/checkpoint_utils/llama_saver.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/llama/megatron/layers/__init__.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/parallel_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/llama/megatron/layers/parallel_attention.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/parallel_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/llama/megatron/layers/parallel_decoder.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/parallel_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/llama/megatron/layers/parallel_linear.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/parallel_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/llama/megatron/layers/parallel_mlp.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/llama/megatron/layers/parallel_rmsnorm.py -------------------------------------------------------------------------------- /verl/models/llama/megatron/modeling_llama_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/llama/megatron/modeling_llama_megatron.py -------------------------------------------------------------------------------- /verl/models/mcore/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/mcore/__init__.py -------------------------------------------------------------------------------- /verl/models/mcore/config_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/mcore/config_converter.py -------------------------------------------------------------------------------- /verl/models/mcore/loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/mcore/loader.py -------------------------------------------------------------------------------- /verl/models/mcore/model_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/mcore/model_forward.py -------------------------------------------------------------------------------- /verl/models/mcore/model_initializer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/mcore/model_initializer.py -------------------------------------------------------------------------------- /verl/models/mcore/patch_v012.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/mcore/patch_v012.py -------------------------------------------------------------------------------- /verl/models/mcore/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/mcore/readme.md -------------------------------------------------------------------------------- /verl/models/mcore/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/mcore/registry.py -------------------------------------------------------------------------------- /verl/models/mcore/saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/mcore/saver.py -------------------------------------------------------------------------------- /verl/models/mcore/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/mcore/util.py -------------------------------------------------------------------------------- /verl/models/mcore/weight_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/mcore/weight_converter.py -------------------------------------------------------------------------------- /verl/models/qwen2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/qwen2/__init__.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/qwen2/megatron/__init__.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/qwen2/megatron/checkpoint_utils/__init__.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader_depracated.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader_depracated.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/qwen2/megatron/layers/__init__.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/parallel_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/qwen2/megatron/layers/parallel_attention.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/parallel_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/qwen2/megatron/layers/parallel_decoder.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/parallel_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/qwen2/megatron/layers/parallel_linear.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/parallel_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/qwen2/megatron/layers/parallel_mlp.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py -------------------------------------------------------------------------------- /verl/models/qwen2/megatron/modeling_qwen2_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/qwen2/megatron/modeling_qwen2_megatron.py -------------------------------------------------------------------------------- /verl/models/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/registry.py -------------------------------------------------------------------------------- /verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/transformers/__init__.py -------------------------------------------------------------------------------- /verl/models/transformers/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/transformers/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/models/transformers/__pycache__/monkey_patch.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/transformers/__pycache__/monkey_patch.cpython-310.pyc -------------------------------------------------------------------------------- /verl/models/transformers/kimi_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/transformers/kimi_vl.py -------------------------------------------------------------------------------- /verl/models/transformers/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/transformers/llama.py -------------------------------------------------------------------------------- /verl/models/transformers/monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/transformers/monkey_patch.py -------------------------------------------------------------------------------- /verl/models/transformers/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/transformers/qwen2.py -------------------------------------------------------------------------------- /verl/models/transformers/qwen2_5_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/transformers/qwen2_5_vl.py -------------------------------------------------------------------------------- /verl/models/transformers/qwen2_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/transformers/qwen2_vl.py -------------------------------------------------------------------------------- /verl/models/weight_loader_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/models/weight_loader_registry.py -------------------------------------------------------------------------------- /verl/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/protocol.py -------------------------------------------------------------------------------- /verl/single_controller/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/__init__.py -------------------------------------------------------------------------------- /verl/single_controller/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/base/__init__.py -------------------------------------------------------------------------------- /verl/single_controller/base/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/base/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/__pycache__/decorator.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/base/__pycache__/decorator.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/__pycache__/worker.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/base/__pycache__/worker.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/__pycache__/worker_group.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/base/__pycache__/worker_group.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/base/decorator.py -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/base/megatron/__init__.py -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/base/megatron/worker.py -------------------------------------------------------------------------------- /verl/single_controller/base/megatron/worker_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/base/megatron/worker_group.py -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/base/register_center/__init__.py -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/base/register_center/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/__pycache__/ray.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/base/register_center/__pycache__/ray.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/base/register_center/ray.py -------------------------------------------------------------------------------- /verl/single_controller/base/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/base/worker.py -------------------------------------------------------------------------------- /verl/single_controller/base/worker_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/base/worker_group.py -------------------------------------------------------------------------------- /verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/ray/__init__.py -------------------------------------------------------------------------------- /verl/single_controller/ray/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/ray/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/ray/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/ray/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/single_controller/ray/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/ray/base.py -------------------------------------------------------------------------------- /verl/single_controller/ray/megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/single_controller/ray/megatron.py -------------------------------------------------------------------------------- /verl/third_party/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/__init__.py -------------------------------------------------------------------------------- /verl/third_party/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/sglang/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/sglang/__init__.py -------------------------------------------------------------------------------- /verl/third_party/sglang/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/sglang/parallel_state.py -------------------------------------------------------------------------------- /verl/third_party/vllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/__init__.py -------------------------------------------------------------------------------- /verl/third_party/vllm/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_5_4/__init__.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_5_4/arg_utils.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_5_4/config.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/dtensor_weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_5_4/dtensor_weight_loaders.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_5_4/llm.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/llm_engine_sp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_5_4/llm_engine_sp.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/megatron_weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_5_4/megatron_weight_loaders.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/model_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_5_4/model_loader.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_5_4/model_runner.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/spmd_gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_5_4/spmd_gpu_executor.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_5_4/tokenizer.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_5_4/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_5_4/worker.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_6_3/__init__.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_6_3/arg_utils.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_6_3/config.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/dtensor_weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_6_3/dtensor_weight_loaders.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_6_3/llm.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/llm_engine_sp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_6_3/llm_engine_sp.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/megatron_weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_6_3/megatron_weight_loaders.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/model_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_6_3/model_loader.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_6_3/model_runner.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_6_3/parallel_state.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/spmd_gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_6_3/spmd_gpu_executor.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py -------------------------------------------------------------------------------- /verl/third_party/vllm/vllm_v_0_6_3/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/third_party/vllm/vllm_v_0_6_3/worker.py -------------------------------------------------------------------------------- /verl/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/tools/__init__.py -------------------------------------------------------------------------------- /verl/tools/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/tools/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/tools/__pycache__/base_tool.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/tools/__pycache__/base_tool.cpython-310.pyc -------------------------------------------------------------------------------- /verl/tools/__pycache__/schemas.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/tools/__pycache__/schemas.cpython-310.pyc -------------------------------------------------------------------------------- /verl/tools/base_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/tools/base_tool.py -------------------------------------------------------------------------------- /verl/tools/gsm8k_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/tools/gsm8k_tool.py -------------------------------------------------------------------------------- /verl/tools/sandbox_fusion_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/tools/sandbox_fusion_tools.py -------------------------------------------------------------------------------- /verl/tools/schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/tools/schemas.py -------------------------------------------------------------------------------- /verl/tools/search_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/tools/search_tool.py -------------------------------------------------------------------------------- /verl/tools/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/tools/utils/__init__.py -------------------------------------------------------------------------------- /verl/tools/utils/search_r1_like_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/tools/utils/search_r1_like_utils.py -------------------------------------------------------------------------------- /verl/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/__init__.py -------------------------------------------------------------------------------- /verl/trainer/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/__pycache__/main_ppo.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/__pycache__/main_ppo.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/config/evaluation.yaml -------------------------------------------------------------------------------- /verl/trainer/config/generation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/config/generation.yaml -------------------------------------------------------------------------------- /verl/trainer/config/ppo_megatron_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/config/ppo_megatron_trainer.yaml -------------------------------------------------------------------------------- /verl/trainer/config/ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/config/ppo_trainer.yaml -------------------------------------------------------------------------------- /verl/trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/config/sft_trainer.yaml -------------------------------------------------------------------------------- /verl/trainer/fsdp_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/fsdp_sft_trainer.py -------------------------------------------------------------------------------- /verl/trainer/main_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/main_eval.py -------------------------------------------------------------------------------- /verl/trainer/main_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/main_generation.py -------------------------------------------------------------------------------- /verl/trainer/main_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/main_ppo.py -------------------------------------------------------------------------------- /verl/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/ppo/__init__.py -------------------------------------------------------------------------------- /verl/trainer/ppo/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/ppo/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/ppo/__pycache__/core_algos.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/ppo/__pycache__/core_algos.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/ppo/__pycache__/metric_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/ppo/__pycache__/metric_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/ppo/__pycache__/ray_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/ppo/__pycache__/ray_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/ppo/__pycache__/reward.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/ppo/__pycache__/reward.cpython-310.pyc -------------------------------------------------------------------------------- /verl/trainer/ppo/core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/ppo/core_algos.py -------------------------------------------------------------------------------- /verl/trainer/ppo/metric_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/ppo/metric_utils.py -------------------------------------------------------------------------------- /verl/trainer/ppo/ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/ppo/ray_trainer.py -------------------------------------------------------------------------------- /verl/trainer/ppo/reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/ppo/reward.py -------------------------------------------------------------------------------- /verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/trainer/runtime_env.yaml -------------------------------------------------------------------------------- /verl/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__init__.py -------------------------------------------------------------------------------- /verl/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/activation_offload.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/activation_offload.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/device.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/device.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/flops_counter.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/flops_counter.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/fs.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/fs.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/fsdp_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/fsdp_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/hdfs_io.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/hdfs_io.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/import_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/import_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/logging_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/logging_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/py_functional.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/py_functional.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/ray_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/ray_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/seqlen_balancing.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/seqlen_balancing.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/tokenizer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/tokenizer.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/torch_dtypes.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/torch_dtypes.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/torch_functional.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/torch_functional.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/tracking.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/tracking.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/ulysses.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/ulysses.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/__pycache__/vllm_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/__pycache__/vllm_utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/activation_offload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/activation_offload.py -------------------------------------------------------------------------------- /verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/checkpoint/__init__.py -------------------------------------------------------------------------------- /verl/utils/checkpoint/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/checkpoint/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/checkpoint/__pycache__/checkpoint_manager.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/checkpoint/__pycache__/checkpoint_manager.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/checkpoint/__pycache__/fsdp_checkpoint_manager.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/checkpoint/__pycache__/fsdp_checkpoint_manager.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/checkpoint/checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/checkpoint/checkpoint_manager.py -------------------------------------------------------------------------------- /verl/utils/checkpoint/fsdp_checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/checkpoint/fsdp_checkpoint_manager.py -------------------------------------------------------------------------------- /verl/utils/checkpoint/megatron_checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/checkpoint/megatron_checkpoint_manager.py -------------------------------------------------------------------------------- /verl/utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/config.py -------------------------------------------------------------------------------- /verl/utils/dataset/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/dataset/README.md -------------------------------------------------------------------------------- /verl/utils/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/dataset/__init__.py -------------------------------------------------------------------------------- /verl/utils/dataset/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/dataset/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/dataset/__pycache__/rl_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/dataset/__pycache__/rl_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/dataset/__pycache__/rm_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/dataset/__pycache__/rm_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/dataset/__pycache__/sft_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/dataset/__pycache__/sft_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/dataset/multiturn_sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/dataset/multiturn_sft_dataset.py -------------------------------------------------------------------------------- /verl/utils/dataset/rl_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/dataset/rl_dataset.py -------------------------------------------------------------------------------- /verl/utils/dataset/rm_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/dataset/rm_dataset.py -------------------------------------------------------------------------------- /verl/utils/dataset/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/dataset/sft_dataset.py -------------------------------------------------------------------------------- /verl/utils/dataset/vision_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/dataset/vision_utils.py -------------------------------------------------------------------------------- /verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/debug/__init__.py -------------------------------------------------------------------------------- /verl/utils/debug/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/debug/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/debug/__pycache__/performance.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/debug/__pycache__/performance.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/debug/performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/debug/performance.py -------------------------------------------------------------------------------- /verl/utils/debug/profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/debug/profile.py -------------------------------------------------------------------------------- /verl/utils/debug/trajectory_tracker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/debug/trajectory_tracker.py -------------------------------------------------------------------------------- /verl/utils/device.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/device.py -------------------------------------------------------------------------------- /verl/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/distributed.py -------------------------------------------------------------------------------- /verl/utils/experimental/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/experimental/__init__.py -------------------------------------------------------------------------------- /verl/utils/experimental/torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/experimental/torch_functional.py -------------------------------------------------------------------------------- /verl/utils/flops_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/flops_counter.py -------------------------------------------------------------------------------- /verl/utils/fs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/fs.py -------------------------------------------------------------------------------- /verl/utils/fsdp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/fsdp_utils.py -------------------------------------------------------------------------------- /verl/utils/generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/generation/__init__.py -------------------------------------------------------------------------------- /verl/utils/hdfs_io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/hdfs_io.py -------------------------------------------------------------------------------- /verl/utils/import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/import_utils.py -------------------------------------------------------------------------------- /verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/logger/__init__.py -------------------------------------------------------------------------------- /verl/utils/logger/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/logger/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/logger/__pycache__/aggregate_logger.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/logger/__pycache__/aggregate_logger.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/logger/aggregate_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/logger/aggregate_logger.py -------------------------------------------------------------------------------- /verl/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/logging_utils.py -------------------------------------------------------------------------------- /verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/megatron/__init__.py -------------------------------------------------------------------------------- /verl/utils/megatron/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/megatron/memory.py -------------------------------------------------------------------------------- /verl/utils/megatron/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/megatron/optimizer.py -------------------------------------------------------------------------------- /verl/utils/megatron/pipeline_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/megatron/pipeline_parallel.py -------------------------------------------------------------------------------- /verl/utils/megatron/sequence_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/megatron/sequence_parallel.py -------------------------------------------------------------------------------- /verl/utils/megatron/tensor_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/megatron/tensor_parallel.py -------------------------------------------------------------------------------- /verl/utils/megatron_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/megatron_utils.py -------------------------------------------------------------------------------- /verl/utils/memory_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/memory_buffer.py -------------------------------------------------------------------------------- /verl/utils/metric/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/metric/__init__.py -------------------------------------------------------------------------------- /verl/utils/metric/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/metric/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/metric/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/metric/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/metric/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/metric/utils.py -------------------------------------------------------------------------------- /verl/utils/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/model.py -------------------------------------------------------------------------------- /verl/utils/net_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/net_utils.py -------------------------------------------------------------------------------- /verl/utils/py_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/py_functional.py -------------------------------------------------------------------------------- /verl/utils/ray_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/ray_utils.py -------------------------------------------------------------------------------- /verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/rendezvous/__init__.py -------------------------------------------------------------------------------- /verl/utils/rendezvous/ray_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/rendezvous/ray_backend.py -------------------------------------------------------------------------------- /verl/utils/reward_score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/__init__.py -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/__pycache__/math_dapo.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/__pycache__/math_dapo.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/geo3k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/geo3k.py -------------------------------------------------------------------------------- /verl/utils/reward_score/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/gsm8k.py -------------------------------------------------------------------------------- /verl/utils/reward_score/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/math.py -------------------------------------------------------------------------------- /verl/utils/reward_score/math_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/math_batch.py -------------------------------------------------------------------------------- /verl/utils/reward_score/math_dapo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/math_dapo.py -------------------------------------------------------------------------------- /verl/utils/reward_score/math_verify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/math_verify.py -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_code/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/prime_code/__init__.py -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_code/testing_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/prime_code/testing_util.py -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_code/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/prime_code/utils.py -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_math/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/prime_math/__init__.py -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_math/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/prime_math/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_math/__pycache__/grader.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/prime_math/__pycache__/grader.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_math/__pycache__/math_normalize.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/prime_math/__pycache__/math_normalize.cpython-310.pyc -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_math/grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/prime_math/grader.py -------------------------------------------------------------------------------- /verl/utils/reward_score/prime_math/math_normalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/prime_math/math_normalize.py -------------------------------------------------------------------------------- /verl/utils/reward_score/sandbox_fusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/sandbox_fusion/__init__.py -------------------------------------------------------------------------------- /verl/utils/reward_score/sandbox_fusion/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/sandbox_fusion/utils.py -------------------------------------------------------------------------------- /verl/utils/reward_score/search_r1_like_qa_em.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/reward_score/search_r1_like_qa_em.py -------------------------------------------------------------------------------- /verl/utils/seqlen_balancing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/seqlen_balancing.py -------------------------------------------------------------------------------- /verl/utils/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/tokenizer.py -------------------------------------------------------------------------------- /verl/utils/torch_dtypes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/torch_dtypes.py -------------------------------------------------------------------------------- /verl/utils/torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/torch_functional.py -------------------------------------------------------------------------------- /verl/utils/tracking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/tracking.py -------------------------------------------------------------------------------- /verl/utils/ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/ulysses.py -------------------------------------------------------------------------------- /verl/utils/vllm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/utils/vllm_utils.py -------------------------------------------------------------------------------- /verl/version/version: -------------------------------------------------------------------------------- 1 | 0.4.0.dev 2 | -------------------------------------------------------------------------------- /verl/workers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/__init__.py -------------------------------------------------------------------------------- /verl/workers/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/__pycache__/fsdp_workers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/__pycache__/fsdp_workers.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/actor/__init__.py -------------------------------------------------------------------------------- /verl/workers/actor/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/actor/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/actor/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/actor/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/actor/__pycache__/dp_actor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/actor/__pycache__/dp_actor.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/actor/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/actor/base.py -------------------------------------------------------------------------------- /verl/workers/actor/dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/actor/dp_actor.py -------------------------------------------------------------------------------- /verl/workers/actor/megatron_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/actor/megatron_actor.py -------------------------------------------------------------------------------- /verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/critic/__init__.py -------------------------------------------------------------------------------- /verl/workers/critic/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/critic/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/critic/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/critic/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/critic/__pycache__/dp_critic.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/critic/__pycache__/dp_critic.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/critic/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/critic/base.py -------------------------------------------------------------------------------- /verl/workers/critic/dp_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/critic/dp_critic.py -------------------------------------------------------------------------------- /verl/workers/critic/megatron_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/critic/megatron_critic.py -------------------------------------------------------------------------------- /verl/workers/fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/fsdp_workers.py -------------------------------------------------------------------------------- /verl/workers/megatron_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/megatron_workers.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/reward_manager/__init__.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/reward_manager/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/reward_manager/__pycache__/batch.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/reward_manager/__pycache__/batch.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/reward_manager/__pycache__/dapo.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/reward_manager/__pycache__/dapo.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/reward_manager/__pycache__/naive.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/reward_manager/__pycache__/naive.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/reward_manager/__pycache__/naive_math.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/reward_manager/__pycache__/naive_math.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/reward_manager/__pycache__/prime.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/reward_manager/__pycache__/prime.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/reward_manager/batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/reward_manager/batch.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/dapo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/reward_manager/dapo.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/em.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/reward_manager/em.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/naive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/reward_manager/naive.py -------------------------------------------------------------------------------- /verl/workers/reward_manager/prime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/reward_manager/prime.py -------------------------------------------------------------------------------- /verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/reward_model/__init__.py -------------------------------------------------------------------------------- /verl/workers/reward_model/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/reward_model/base.py -------------------------------------------------------------------------------- /verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/reward_model/megatron/__init__.py -------------------------------------------------------------------------------- /verl/workers/reward_model/megatron/reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/reward_model/megatron/reward_model.py -------------------------------------------------------------------------------- /verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/__init__.py -------------------------------------------------------------------------------- /verl/workers/rollout/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/__pycache__/async_server.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/__pycache__/async_server.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/__pycache__/chat_scheduler.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/__pycache__/chat_scheduler.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/__pycache__/hf_rollout.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/__pycache__/hf_rollout.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/async_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/async_server.py -------------------------------------------------------------------------------- /verl/workers/rollout/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/base.py -------------------------------------------------------------------------------- /verl/workers/rollout/chat_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/chat_scheduler.py -------------------------------------------------------------------------------- /verl/workers/rollout/hf_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/hf_rollout.py -------------------------------------------------------------------------------- /verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/naive/__init__.py -------------------------------------------------------------------------------- /verl/workers/rollout/naive/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/naive/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/naive/__pycache__/naive_rollout.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/naive/__pycache__/naive_rollout.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/naive/naive_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/naive/naive_rollout.py -------------------------------------------------------------------------------- /verl/workers/rollout/schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/schemas.py -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/sglang_rollout/__init__.py -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/sglang_rollout/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/__pycache__/sglang_rollout.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/sglang_rollout/__pycache__/sglang_rollout.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/async_sglang_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/sglang_rollout/async_sglang_server.py -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/sglang_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/sglang_rollout/sglang_rollout.py -------------------------------------------------------------------------------- /verl/workers/rollout/sglang_rollout/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/sglang_rollout/utils.py -------------------------------------------------------------------------------- /verl/workers/rollout/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/tokenizer.py -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/vllm_rollout/__init__.py -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/vllm_rollout/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/__pycache__/vllm_rollout_spmd.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/vllm_rollout/__pycache__/vllm_rollout_spmd.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/fire_vllm_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/vllm_rollout/fire_vllm_rollout.py -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/vllm_async_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/vllm_rollout/vllm_async_server.py -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/vllm_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/vllm_rollout/vllm_rollout.py -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/sharding_manager/__init__.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/sharding_manager/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc -------------------------------------------------------------------------------- /verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/sharding_manager/base.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/fsdp_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/sharding_manager/fsdp_sglang.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/fsdp_ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/sharding_manager/fsdp_ulysses.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/fsdp_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/sharding_manager/fsdp_vllm.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/megatron_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/sharding_manager/megatron_sglang.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/megatron_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/SSRL/HEAD/verl/workers/sharding_manager/megatron_vllm.py --------------------------------------------------------------------------------