├── Co-rewarding-I ├── data │ └── math │ │ ├── test.parquet │ │ ├── train.parquet │ │ ├── train_original.parquet │ │ ├── train_pairs.parquet │ │ ├── train_rewrite_Qwen3-32B.jsonl │ │ ├── train_rewrite_Qwen3-32B.parquet │ │ └── upload_file.py ├── examples │ └── data_preprocess │ │ ├── full_hh_rlhf.py │ │ ├── geo3k.py │ │ ├── gsm8k.py │ │ ├── gsm8k_dataset.py │ │ ├── gsm8k_multiturn_w_tool.py │ │ ├── hellaswag.py │ │ ├── math_dataset.py │ │ └── multiturn.py ├── pyproject.toml ├── rewrite_questions.py ├── run_corewarding-I.sh ├── scripts │ ├── converter_hf_to_mcore.py │ ├── diagnose.py │ ├── install_env.sh │ └── model_merger.py ├── setup.py └── verl │ ├── __init__.py │ ├── models │ ├── README.md │ ├── __init__.py │ ├── llama │ │ ├── __init__.py │ │ └── megatron │ │ │ ├── __init__.py │ │ │ ├── checkpoint_utils │ │ │ ├── __init__.py │ │ │ ├── llama_loader.py │ │ │ ├── llama_loader_depracated.py │ │ │ └── llama_saver.py │ │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── parallel_attention.py │ │ │ ├── parallel_decoder.py │ │ │ ├── parallel_linear.py │ │ │ ├── parallel_mlp.py │ │ │ └── parallel_rmsnorm.py │ │ │ └── modeling_llama_megatron.py │ ├── mcore │ │ ├── __init__.py │ │ ├── config_converter.py │ │ ├── loader.py │ │ ├── model_forward.py │ │ ├── model_initializer.py │ │ ├── readme.md │ │ ├── registry.py │ │ ├── saver.py │ │ ├── util.py │ │ └── weight_converter.py │ ├── qwen2 │ │ ├── __init__.py │ │ └── megatron │ │ │ ├── __init__.py │ │ │ ├── checkpoint_utils │ │ │ ├── __init__.py │ │ │ ├── qwen2_loader.py │ │ │ ├── qwen2_loader_depracated.py │ │ │ └── qwen2_saver.py │ │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── parallel_attention.py │ │ │ ├── parallel_decoder.py │ │ │ ├── parallel_linear.py │ │ │ ├── parallel_mlp.py │ │ │ └── parallel_rmsnorm.py │ │ │ └── modeling_qwen2_megatron.py │ ├── registry.py │ ├── transformers │ │ ├── __init__.py │ │ ├── llama.py │ │ ├── monkey_patch.py │ │ ├── qwen2.py │ │ ├── qwen2_5_vl.py │ │ └── qwen2_vl.py │ └── weight_loader_registry.py │ ├── protocol.py │ ├── single_controller │ ├── __init__.py │ ├── base │ │ ├── __init__.py │ │ ├── decorator.py │ │ ├── megatron │ │ │ ├── __init__.py │ │ │ ├── worker.py │ │ │ └── worker_group.py │ │ ├── register_center │ │ │ ├── __init__.py │ │ │ └── ray.py │ │ ├── worker.py │ │ └── worker_group.py │ └── ray │ │ ├── __init__.py │ │ ├── base.py │ │ └── megatron.py │ ├── third_party │ ├── __init__.py │ ├── sglang │ │ ├── __init__.py │ │ └── parallel_state.py │ └── vllm │ │ ├── __init__.py │ │ ├── vllm_v_0_5_4 │ │ ├── __init__.py │ │ ├── arg_utils.py │ │ ├── config.py │ │ ├── dtensor_weight_loaders.py │ │ ├── hf_weight_loader.py │ │ ├── llm.py │ │ ├── llm_engine_sp.py │ │ ├── megatron_weight_loaders.py │ │ ├── model_loader.py │ │ ├── model_runner.py │ │ ├── parallel_state.py │ │ ├── spmd_gpu_executor.py │ │ ├── tokenizer.py │ │ └── worker.py │ │ └── vllm_v_0_6_3 │ │ ├── __init__.py │ │ ├── arg_utils.py │ │ ├── config.py │ │ ├── dtensor_weight_loaders.py │ │ ├── hf_weight_loader.py │ │ ├── llm.py │ │ ├── llm_engine_sp.py │ │ ├── megatron_weight_loaders.py │ │ ├── model_loader.py │ │ ├── model_runner.py │ │ ├── parallel_state.py │ │ ├── spmd_gpu_executor.py │ │ ├── tokenizer.py │ │ └── worker.py │ ├── tools │ ├── __init__.py │ ├── base_tool.py │ ├── gsm8k_tool.py │ └── schemas.py │ ├── trainer │ ├── __init__.py │ ├── config │ │ ├── evaluation.yaml │ │ ├── generation.yaml │ │ ├── ppo_megatron_trainer.yaml │ │ ├── ppo_trainer.yaml │ │ └── sft_trainer.yaml │ ├── fsdp_sft_trainer.py │ ├── main_eval.py │ ├── main_generation.py │ ├── main_ppo.py │ ├── ppo │ │ ├── __init__.py │ │ ├── core_algos.py │ │ ├── metric_utils.py │ │ ├── ray_trainer.py │ │ └── reward.py │ └── runtime_env.yaml │ ├── utils │ ├── __init__.py │ ├── checkpoint │ │ ├── __init__.py │ │ ├── checkpoint_manager.py │ │ ├── fsdp_checkpoint_manager.py │ │ └── megatron_checkpoint_manager.py │ ├── config.py │ ├── dataset │ │ ├── README.md │ │ ├── __init__.py │ │ ├── multiturn_sft_dataset.py │ │ ├── pair_dataset.py │ │ ├── rl_dataset.py │ │ ├── rm_dataset.py │ │ ├── sft_dataset.py │ │ └── vision_utils.py │ ├── debug │ │ ├── __init__.py │ │ ├── performance.py │ │ ├── profile.py │ │ └── trajectory_tracker.py │ ├── distributed.py │ ├── experimental │ │ ├── __init__.py │ │ └── torch_functional.py │ ├── flops_counter.py │ ├── fs.py │ ├── fsdp_utils.py │ ├── hdfs_io.py │ ├── import_utils.py │ ├── logger │ │ ├── __init__.py │ │ └── aggregate_logger.py │ ├── logging_utils.py │ ├── megatron │ │ ├── __init__.py │ │ ├── memory.py │ │ ├── optimizer.py │ │ ├── pipeline_parallel.py │ │ ├── sequence_parallel.py │ │ └── tensor_parallel.py │ ├── megatron_utils.py │ ├── memory_buffer.py │ ├── metric │ │ ├── __init__.py │ │ └── utils.py │ ├── model.py │ ├── net_utils.py │ ├── py_functional.py │ ├── ray_utils.py │ ├── rendezvous │ │ ├── __init__.py │ │ └── ray_backend.py │ ├── reward_score │ │ ├── __init__.py │ │ ├── geo3k.py │ │ ├── gsm8k.py │ │ ├── math.py │ │ ├── math_batch.py │ │ ├── math_dapo.py │ │ ├── math_verify.py │ │ ├── prime_code │ │ │ ├── __init__.py │ │ │ ├── testing_util.py │ │ │ └── utils.py │ │ ├── prime_math │ │ │ ├── __init__.py │ │ │ ├── grader.py │ │ │ └── math_normalize.py │ │ └── sandbox_fusion │ │ │ ├── __init__.py │ │ │ └── utils.py │ ├── seqlen_balancing.py │ ├── tokenizer.py │ ├── torch_dtypes.py │ ├── torch_functional.py │ ├── tracking.py │ ├── ulysses.py │ └── vllm_utils.py │ ├── version │ └── version │ └── workers │ ├── __init__.py │ ├── actor │ ├── __init__.py │ ├── base.py │ ├── dp_actor.py │ └── megatron_actor.py │ ├── critic │ ├── __init__.py │ ├── base.py │ ├── dp_critic.py │ └── megatron_critic.py │ ├── fsdp_workers.py │ ├── megatron_workers.py │ ├── reward_manager │ ├── __init__.py │ ├── batch.py │ ├── co_reward.py │ ├── dapo.py │ ├── naive.py │ └── prime.py │ ├── reward_model │ ├── __init__.py │ ├── base.py │ └── megatron │ │ ├── __init__.py │ │ └── reward_model.py │ ├── rollout │ ├── __init__.py │ ├── async_server.py │ ├── base.py │ ├── hf_rollout.py │ ├── naive │ │ ├── __init__.py │ │ └── naive_rollout.py │ ├── schemas.py │ ├── sglang_rollout │ │ ├── __init__.py │ │ ├── async_sglang_rollout.py │ │ └── sglang_rollout.py │ ├── tokenizer.py │ └── vllm_rollout │ │ ├── __init__.py │ │ ├── fire_vllm_rollout.py │ │ ├── vllm_async_server.py │ │ ├── vllm_rollout.py │ │ └── vllm_rollout_spmd.py │ └── sharding_manager │ ├── __init__.py │ ├── base.py │ ├── fsdp_sglang.py │ ├── fsdp_ulysses.py │ ├── fsdp_vllm.py │ ├── megatron_sglang.py │ └── megatron_vllm.py ├── Co-rewarding-II ├── README.md ├── data │ ├── AMC-TTT │ │ ├── test.parquet │ │ ├── test_original.parquet │ │ ├── test_rewrite_Qwen3-32B.jsonl │ │ └── test_rewrite_Qwen3-32B.parquet │ ├── dapo │ │ ├── train.parquet │ │ ├── train_original.parquet │ │ ├── train_pairs.parquet │ │ ├── train_rewrite_Qwen3-32B.jsonl │ │ ├── train_rewrite_Qwen3-32B.parquet │ │ └── upload_file.py │ ├── math │ │ ├── test.parquet │ │ ├── train.parquet │ │ ├── train_original.parquet │ │ ├── train_pairs.parquet │ │ ├── train_rewrite_Qwen3-32B.jsonl │ │ ├── train_rewrite_Qwen3-32B.parquet │ │ └── upload_file.py │ └── open-rs │ │ ├── train.parquet │ │ ├── train_original.parquet │ │ ├── train_pairs.parquet │ │ ├── train_rewrite_Qwen3-32B.jsonl │ │ ├── train_rewrite_Qwen3-32B.parquet │ │ └── upload_file.py ├── examples │ ├── data_preprocess │ │ ├── aime2024_multiturn_w_tool.py │ │ ├── amc.py │ │ ├── dapo17ken_dataset.py │ │ ├── dapo_multiturn_w_tool.py │ │ ├── full_hh_rlhf.py │ │ ├── geo3k.py │ │ ├── geo3k_multiturn_w_tool.py │ │ ├── gsm8k.py │ │ ├── gsm8k_multiturn_w_interaction.py │ │ ├── gsm8k_multiturn_w_tool.py │ │ ├── gsm8k_tool_agent_loop.py │ │ ├── hellaswag.py │ │ ├── math_dataset.py │ │ ├── multiturn.py │ │ ├── open-rs.py │ │ └── preprocess_search_r1_dataset.py │ ├── generation │ │ ├── run_deepseek7b_mutli_node.sh │ │ └── run_deepseek_v2_lite_math.sh │ ├── gmpo_trainer │ │ ├── README.md │ │ ├── run_qwen2_5-7b_math.sh │ │ ├── test_dapo_7b_math.sh │ │ └── test_dapo_qwen3_30b_math.sh │ ├── gpg_trainer │ │ ├── gpg.md │ │ ├── run_qwen2-7b_math.sh │ │ └── run_qwen2-7b_math_megatron.sh │ ├── grpo_trainer │ │ ├── README.md │ │ ├── run_deepseek671b_math_megatron_80gb.sh │ │ ├── run_deepseek671b_math_megatron_96gb.sh │ │ ├── run_deepseek7b_llm.sh │ │ ├── run_deepseek7b_llm_math.sh │ │ ├── run_deepseek7b_llm_math_megatron.sh │ │ ├── run_deepseek7b_llm_seq_balance.sh │ │ ├── run_minicpmo2_6.sh │ │ ├── run_moonlight16b_math_megatron.sh │ │ ├── run_qwen2-7b.sh │ │ ├── run_qwen2-7b_math.sh │ │ ├── run_qwen2-7b_math_megatron.sh │ │ ├── run_qwen2-7b_seq_balance.sh │ │ ├── run_qwen2-7b_seq_balance_math_megatron.sh │ │ ├── run_qwen2-7b_sgl_megatron.sh │ │ ├── run_qwen2_5-3b_gsm8k_grpo_lora.sh │ │ ├── run_qwen2_5-7b_math_megatron_diff_tp.sh │ │ ├── run_qwen2_5_32b_grpo_npu.sh │ │ ├── run_qwen2_5_7b_grpo_discrete_prof_npu.sh │ │ ├── run_qwen2_5_7b_grpo_e2e_prof_npu.sh │ │ ├── run_qwen2_5_7b_grpo_npu.sh │ │ ├── run_qwen2_5_vl-7b-megatron.sh │ │ ├── run_qwen2_5_vl-7b-sglang.sh │ │ ├── run_qwen2_5_vl-7b.sh │ │ ├── run_qwen2_5_vl-7b_lora.sh │ │ ├── run_qwen2_5_vl-7b_seq_balance.sh │ │ ├── run_qwen2_5_vl_32b_npu.sh │ │ ├── run_qwen2_5_vl_3b_npu.sh │ │ ├── run_qwen2_5_vl_7b_npu.sh │ │ ├── run_qwen3-235b_megatron_96gb.sh │ │ ├── run_qwen3-8b.sh │ │ └── run_qwen3moe-30b_megatron_96gb.sh │ ├── ppo_trainer │ │ ├── README.md │ │ ├── run_deepseek7b_llm.sh │ │ ├── run_deepseek7b_llm_modelscope.sh │ │ ├── run_deepseek7b_llm_pfppo.sh │ │ ├── run_deepseek7b_llm_sandbox_fusion.sh │ │ ├── run_deepseek7b_llm_sp2.sh │ │ ├── run_deepseek_full_hh_rlhf.sh │ │ ├── run_deepseek_math_gsm8k_megatron.sh │ │ ├── run_deepseek_math_gsm8k_megatron_nsys.sh │ │ ├── run_gemma.sh │ │ ├── run_moonlight16b_a3b_gsm8k_megatron.sh │ │ ├── run_qwen1.5_moe_a2.7b-gsm8k_megatron.sh │ │ ├── run_qwen2-7b_math_gsm8k_megatron.sh │ │ ├── run_qwen2-7b_rm.sh │ │ ├── run_qwen2-7b_rm_seq_balance.sh │ │ ├── run_qwen2-7b_rm_seq_balance_fused_kernels.sh │ │ ├── run_qwen2-7b_rm_seq_balance_nsys.sh │ │ ├── run_qwen2-7b_seq_balance.sh │ │ ├── run_qwen2-7b_sglang_seq_balance.sh │ │ └── run_qwen2.5-32b.sh │ ├── ray │ │ └── tutorial.ipynb │ ├── reinforce_plus_plus_trainer │ │ ├── run_qwen2-7b_math_rf.sh │ │ └── run_qwen2-7b_math_rf_baseline.sh │ ├── remax_trainer │ │ ├── run_qwen2.5-3b_seq_balance.sh │ │ └── run_qwen2.5-7b_seq_balance.sh │ ├── rloo_trainer │ │ └── run_qwen2-7b.sh │ ├── sft │ │ ├── gsm8k │ │ │ ├── run_deepseek_6b7.sh │ │ │ ├── run_gemma_2b.sh │ │ │ ├── run_gemma_7b.sh │ │ │ ├── run_qwen3_8b_sft_peft_sp2_npu.sh │ │ │ ├── run_qwen_05_peft.sh │ │ │ ├── run_qwen_05_sp2.sh │ │ │ └── run_qwen_05_sp2_liger.sh │ │ └── multiturn │ │ │ └── run_qwen_05_sp2.sh │ ├── sglang_multiturn │ │ ├── README.md │ │ ├── config │ │ │ ├── geo3k_multiturn_grpo.yaml │ │ │ ├── geo3k_multiturn_megatron_grpo.yaml │ │ │ ├── gsm8k_multiturn_grpo.yaml │ │ │ ├── gsm8k_multiturn_grpo_w_interaction.yaml │ │ │ ├── gsm8k_multiturn_megatron_grpo.yaml │ │ │ ├── interaction_config │ │ │ │ └── gsm8k_interaction_config.yaml │ │ │ ├── retool_multiturn_grpo.yaml │ │ │ ├── search_multiturn_grpo.yaml │ │ │ └── tool_config │ │ │ │ ├── geo3k_tool_config.yaml │ │ │ │ ├── gsm8k_tool_config.yaml │ │ │ │ ├── mcp_server.json │ │ │ │ ├── mcp_tool_config.yaml │ │ │ │ ├── sandbox_fusion_tool_config.yaml │ │ │ │ └── search_tool_config.yaml │ │ ├── geo3k │ │ │ ├── run_qwen2.5-3b_geo3k_multiturn.sh │ │ │ ├── run_qwen2.5-3b_geo3k_multiturn_4xgpu.sh │ │ │ └── run_qwen2.5-3b_megatron_geo3k_multiturn.sh │ │ ├── run_qwen0.5b_gsm8k_multiturn_curriculum.sh │ │ ├── run_qwen2.5-0.5b_gsm8k_multiturn_w_interaction.sh │ │ ├── run_qwen2.5-3b_gsm8k_multiturn.sh │ │ ├── run_qwen2.5-3b_gsm8k_multiturn_4xgpu.sh │ │ ├── run_qwen2.5-3b_gsm8k_tool_agent_mlflow.sh │ │ ├── run_qwen2.5-3b_megatron_gsm8k_multiturn.sh │ │ ├── run_qwen3-4b_gsm8k_multiturn.sh │ │ ├── run_qwen3_4b_dapo_multiturn.sh │ │ └── search_r1_like │ │ │ ├── local_dense_retriever │ │ │ ├── download.py │ │ │ └── retrieval_server.py │ │ │ └── run_qwen2.5-3b_instruct_search_multiturn.sh │ ├── slurm │ │ └── ray_on_slurm.slurm │ ├── split_placement │ │ ├── README.md │ │ ├── config │ │ │ └── ppo_trainer_split.yaml │ │ ├── main_ppo_split.py │ │ ├── run_deepseek7b_llm.sh │ │ └── split_monkey_patch.py │ └── tuning │ │ ├── 0.5b │ │ └── qwen2-0.5b_grpo-lora_1_h100_fsdp_vllm.sh │ │ ├── 1.5b │ │ └── qwen2-1.5b_grpo-lora_1_h100_fsdp_vllm.sh │ │ ├── 14b │ │ ├── qwen2-14b_grpo-lora_2_h100_fsdp_vllm.sh │ │ └── qwen2_14b_grpo_4_h800_fsdp_vllm.sh │ │ ├── 32b │ │ ├── qwen2-32b_grpo-lora_4_h100_fsdp_vllm.sh │ │ └── qwen2_32B_grpo_8_h20_megatron_vllm.sh │ │ ├── 3b │ │ └── qwen2-3b_grpo-lora_1_h100_fsdp_vllm.sh │ │ ├── 70b │ │ ├── qwen2-70b_grpo_32_h20_fsdp_vllm.sh │ │ ├── qwen2-70b_grpo_32_h800_fsdp_vllm.sh │ │ └── qwen2-72b_grpo-lora_8_h100_fsdp_vllm.sh │ │ └── 7b │ │ ├── qwen2-7b_grpo-lora_1_h100_fsdp_vllm.sh │ │ └── qwen2-7b_grpo_2_h800_fsdp_vllm.sh ├── requirements.txt ├── run_corewarding-II.sh ├── scripts │ ├── __init__.py │ ├── converter_hf_to_mcore.py │ ├── diagnose.py │ ├── generate_trainer_config.sh │ ├── init_random_model.py │ ├── install_vllm_sglang_mcore.sh │ ├── legacy_model_merger.py │ ├── model_merger.py │ ├── print_cfg.py │ └── rollout_viewer.py ├── setup.py └── verl │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── base_config.cpython-310.pyc │ └── protocol.cpython-310.pyc │ ├── base_config.py │ ├── experimental │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-310.pyc │ ├── agent_loop │ │ ├── __init__.py │ │ ├── agent_loop.py │ │ ├── single_turn_agent_loop.py │ │ ├── tool_agent_loop.py │ │ └── tool_parser.py │ ├── dataset │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── sampler.cpython-310.pyc │ │ └── sampler.py │ └── dynamic_dataset │ │ ├── __init__.py │ │ └── dynamicgen_dataset.py │ ├── interactions │ ├── __init__.py │ ├── base.py │ ├── gsm8k_interaction.py │ └── utils │ │ ├── __init__.py │ │ └── interaction_registry.py │ ├── model_merger │ ├── __init__.py │ ├── __main__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __main__.cpython-310.pyc │ │ ├── base_model_merger.cpython-310.pyc │ │ └── fsdp_model_merger.cpython-310.pyc │ ├── base_model_merger.py │ ├── fsdp_model_merger.py │ └── megatron_model_merger.py │ ├── models │ ├── README.md │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── registry.cpython-310.pyc │ ├── llama │ │ ├── __init__.py │ │ └── megatron │ │ │ ├── __init__.py │ │ │ ├── checkpoint_utils │ │ │ ├── __init__.py │ │ │ ├── llama_loader.py │ │ │ ├── llama_loader_depracated.py │ │ │ └── llama_saver.py │ │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── parallel_attention.py │ │ │ ├── parallel_decoder.py │ │ │ ├── parallel_linear.py │ │ │ ├── parallel_mlp.py │ │ │ └── parallel_rmsnorm.py │ │ │ └── modeling_llama_megatron.py │ ├── mcore │ │ ├── __init__.py │ │ ├── config_converter.py │ │ ├── loader.py │ │ ├── mbridge.py │ │ ├── model_forward.py │ │ ├── model_forward_fused.py │ │ ├── model_initializer.py │ │ ├── patch_v012.py │ │ ├── qwen2_5_vl │ │ │ ├── __init__.py │ │ │ ├── attention.py │ │ │ ├── model.py │ │ │ ├── rope_utils.py │ │ │ ├── vision_config.py │ │ │ ├── vision_model.py │ │ │ └── vision_transformer_block.py │ │ ├── readme.md │ │ ├── registry.py │ │ ├── saver.py │ │ ├── util.py │ │ └── weight_converter.py │ ├── qwen2 │ │ ├── __init__.py │ │ └── megatron │ │ │ ├── __init__.py │ │ │ ├── checkpoint_utils │ │ │ ├── __init__.py │ │ │ ├── qwen2_loader.py │ │ │ ├── qwen2_loader_depracated.py │ │ │ └── qwen2_saver.py │ │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── parallel_attention.py │ │ │ ├── parallel_decoder.py │ │ │ ├── parallel_linear.py │ │ │ ├── parallel_mlp.py │ │ │ └── parallel_rmsnorm.py │ │ │ └── modeling_qwen2_megatron.py │ ├── registry.py │ ├── transformers │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── monkey_patch.cpython-310.pyc │ │ ├── dense_common.py │ │ ├── kimi_vl.py │ │ ├── llama.py │ │ ├── monkey_patch.py │ │ ├── npu_patch.py │ │ ├── qwen2.py │ │ ├── qwen2_5_vl.py │ │ └── qwen2_vl.py │ └── weight_loader_registry.py │ ├── protocol.py │ ├── py.typed │ ├── single_controller │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-310.pyc │ ├── base │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── decorator.cpython-310.pyc │ │ │ ├── worker.cpython-310.pyc │ │ │ └── worker_group.cpython-310.pyc │ │ ├── decorator.py │ │ ├── worker.py │ │ └── worker_group.py │ └── ray │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── base.cpython-310.pyc │ │ └── base.py │ ├── third_party │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-310.pyc │ ├── sglang │ │ ├── __init__.py │ │ └── parallel_state.py │ ├── torch │ │ ├── __init__.py │ │ └── distributed │ │ │ ├── __init__.py │ │ │ ├── _state_dict_utils.py │ │ │ └── checkpoint │ │ │ ├── __init__.py │ │ │ └── state_dict.py │ └── vllm │ │ ├── __init__.py │ │ └── __pycache__ │ │ └── __init__.cpython-310.pyc │ ├── tools │ ├── __init__.py │ ├── base_tool.py │ ├── geo3k_tool.py │ ├── gsm8k_tool.py │ ├── image_zoom_in_tool.py │ ├── mcp_base_tool.py │ ├── mcp_search_tool.py │ ├── sandbox_fusion_tools.py │ ├── schemas.py │ ├── search_tool.py │ └── utils │ │ ├── __init__.py │ │ ├── mcp_clients │ │ ├── McpClientManager.py │ │ └── utils.py │ │ ├── search_r1_like_utils.py │ │ └── tool_registry.py │ ├── trainer │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── constants_ppo.cpython-310.pyc │ │ └── main_ppo.cpython-310.pyc │ ├── config │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── algorithm.cpython-310.pyc │ │ │ └── config.cpython-310.pyc │ │ ├── _generated_ppo_megatron_trainer.yaml │ │ ├── _generated_ppo_trainer.yaml │ │ ├── actor │ │ │ ├── actor.yaml │ │ │ ├── dp_actor.yaml │ │ │ └── megatron_actor.yaml │ │ ├── algorithm.py │ │ ├── config.py │ │ ├── critic │ │ │ ├── critic.yaml │ │ │ ├── dp_critic.yaml │ │ │ └── megatron_critic.yaml │ │ ├── data │ │ │ └── legacy_data.yaml │ │ ├── evaluation.yaml │ │ ├── generation.yaml │ │ ├── npu_profile │ │ │ └── npu_profile.yaml │ │ ├── ppo_megatron_trainer.yaml │ │ ├── ppo_trainer.yaml │ │ ├── ref │ │ │ ├── dp_ref.yaml │ │ │ ├── megatron_ref.yaml │ │ │ └── ref.yaml │ │ ├── reward_model │ │ │ ├── dp_reward_model.yaml │ │ │ ├── megatron_reward_model.yaml │ │ │ └── reward_model.yaml │ │ ├── rollout │ │ │ └── rollout.yaml │ │ └── sft_trainer.yaml │ ├── constants_ppo.py │ ├── fsdp_sft_trainer.py │ ├── main_eval.py │ ├── main_generation.py │ ├── main_ppo.py │ ├── ppo │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── core_algos.cpython-310.pyc │ │ │ ├── metric_utils.cpython-310.pyc │ │ │ ├── ray_trainer.cpython-310.pyc │ │ │ ├── ray_trainer_coreward.cpython-310.pyc │ │ │ ├── reward.cpython-310.pyc │ │ │ └── utils.cpython-310.pyc │ │ ├── core_algos.py │ │ ├── metric_utils.py │ │ ├── ray_trainer.py │ │ ├── ray_trainer_coreward.py │ │ ├── reward.py │ │ └── utils.py │ └── runtime_env.yaml │ ├── utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── activation_offload.cpython-310.pyc │ │ ├── config.cpython-310.pyc │ │ ├── device.cpython-310.pyc │ │ ├── flops_counter.cpython-310.pyc │ │ ├── fs.cpython-310.pyc │ │ ├── fsdp_utils.cpython-310.pyc │ │ ├── hdfs_io.cpython-310.pyc │ │ ├── import_utils.cpython-310.pyc │ │ ├── logging_utils.cpython-310.pyc │ │ ├── memory_utils.cpython-310.pyc │ │ ├── model.cpython-310.pyc │ │ ├── py_functional.cpython-310.pyc │ │ ├── ray_utils.cpython-310.pyc │ │ ├── rollout_skip.cpython-310.pyc │ │ ├── seqlen_balancing.cpython-310.pyc │ │ ├── tokenizer.cpython-310.pyc │ │ ├── torch_dtypes.cpython-310.pyc │ │ ├── torch_functional.cpython-310.pyc │ │ ├── tracking.cpython-310.pyc │ │ └── ulysses.cpython-310.pyc │ ├── activation_offload.py │ ├── checkpoint │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── checkpoint_manager.cpython-310.pyc │ │ │ └── fsdp_checkpoint_manager.cpython-310.pyc │ │ ├── checkpoint_manager.py │ │ ├── fsdp_checkpoint_manager.py │ │ └── megatron_checkpoint_manager.py │ ├── config.py │ ├── dataset │ │ ├── README.md │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── pair_dataset.cpython-310.pyc │ │ │ ├── rl_dataset.cpython-310.pyc │ │ │ ├── rm_dataset.cpython-310.pyc │ │ │ └── sft_dataset.cpython-310.pyc │ │ ├── multiturn_sft_dataset.py │ │ ├── pair_dataset.py │ │ ├── rl_dataset.py │ │ ├── rm_dataset.py │ │ ├── sft_dataset.py │ │ └── vision_utils.py │ ├── debug │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ └── __init__.cpython-310.pyc │ │ ├── metrics.py │ │ ├── performance.py │ │ └── trajectory_tracker.py │ ├── device.py │ ├── distributed.py │ ├── experimental │ │ ├── __init__.py │ │ └── torch_functional.py │ ├── flops_counter.py │ ├── fs.py │ ├── fsdp_utils.py │ ├── hdfs_io.py │ ├── import_utils.py │ ├── kernel │ │ ├── __init__.py │ │ ├── kernels.py │ │ └── linear_cross_entropy.py │ ├── logger │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── aggregate_logger.cpython-310.pyc │ │ └── aggregate_logger.py │ ├── logging_utils.py │ ├── megatron │ │ ├── __init__.py │ │ ├── dist_checkpointing.py │ │ ├── memory.py │ │ ├── optimizer.py │ │ ├── pipeline_parallel.py │ │ ├── sequence_parallel.py │ │ └── tensor_parallel.py │ ├── megatron_utils.py │ ├── memory_buffer.py │ ├── memory_utils.py │ ├── metric │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── utils.cpython-310.pyc │ │ └── utils.py │ ├── model.py │ ├── net_utils.py │ ├── profiler │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── config.cpython-310.pyc │ │ │ ├── performance.cpython-310.pyc │ │ │ └── profile.cpython-310.pyc │ │ ├── config.py │ │ ├── empty_annotations.py │ │ ├── mstx_profile.py │ │ ├── nvtx_profile.py │ │ ├── performance.py │ │ └── profile.py │ ├── py_functional.py │ ├── ray_utils.py │ ├── rendezvous │ │ ├── __init__.py │ │ └── ray_backend.py │ ├── reward_score │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── math.cpython-310.pyc │ │ ├── geo3k.py │ │ ├── gsm8k.py │ │ ├── math.py │ │ ├── math_batch.py │ │ ├── math_dapo.py │ │ ├── math_verify.py │ │ ├── prime_code │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── testing_util.py │ │ │ └── utils.py │ │ ├── prime_math │ │ │ ├── __init__.py │ │ │ ├── grader.py │ │ │ └── math_normalize.py │ │ ├── sandbox_fusion │ │ │ ├── __init__.py │ │ │ └── utils.py │ │ └── search_r1_like_qa_em.py │ ├── rollout_skip.py │ ├── rollout_trace.py │ ├── seqlen_balancing.py │ ├── tokenizer.py │ ├── torch_dtypes.py │ ├── torch_functional.py │ ├── tracking.py │ ├── transformers_compat.py │ ├── ulysses.py │ └── vllm │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── patch.cpython-310.pyc │ │ └── utils.cpython-310.pyc │ │ ├── patch.py │ │ └── utils.py │ ├── version │ └── version │ └── workers │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ └── fsdp_workers.cpython-310.pyc │ ├── actor │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── base.cpython-310.pyc │ │ └── dp_actor.cpython-310.pyc │ ├── base.py │ ├── dp_actor.py │ └── megatron_actor.py │ ├── config │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── actor.cpython-310.pyc │ │ ├── critic.cpython-310.pyc │ │ ├── engine.cpython-310.pyc │ │ ├── model.cpython-310.pyc │ │ ├── optimizer.cpython-310.pyc │ │ └── rollout.cpython-310.pyc │ ├── actor.py │ ├── critic.py │ ├── engine.py │ ├── model.py │ ├── optimizer.py │ └── rollout.py │ ├── critic │ ├── __init__.py │ ├── base.py │ ├── dp_critic.py │ └── megatron_critic.py │ ├── engine │ ├── __init__.py │ ├── base.py │ ├── fsdp │ │ ├── __init__.py │ │ ├── engine_impl.py │ │ └── utils.py │ └── megatron │ │ ├── __init__.py │ │ └── engine_impl.py │ ├── fsdp_workers.py │ ├── megatron_workers.py │ ├── reward_manager │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── abstract.cpython-310.pyc │ │ ├── batch.cpython-310.pyc │ │ ├── co_rewarding_II.cpython-310.pyc │ │ ├── dapo.cpython-310.pyc │ │ ├── majority_voting.cpython-310.pyc │ │ ├── naive.cpython-310.pyc │ │ ├── naive_train.cpython-310.pyc │ │ ├── prime.cpython-310.pyc │ │ ├── ref_policy_voting.cpython-310.pyc │ │ ├── ref_voting.cpython-310.pyc │ │ ├── ref_voting_sampler.cpython-310.pyc │ │ └── registry.cpython-310.pyc │ ├── abstract.py │ ├── batch.py │ ├── co_rewarding_II.py │ ├── dapo.py │ ├── majority_voting.py │ ├── naive.py │ ├── naive_train.py │ ├── prime.py │ └── registry.py │ ├── reward_model │ ├── __init__.py │ ├── base.py │ └── megatron │ │ ├── __init__.py │ │ └── reward_model.py │ ├── roles │ ├── __init__.py │ ├── actor.py │ └── critic.py │ ├── rollout │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── base.cpython-310.pyc │ │ ├── hf_rollout.cpython-310.pyc │ │ └── rollout_worker.cpython-310.pyc │ ├── async_server.py │ ├── base.py │ ├── hf_rollout.py │ ├── naive │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── naive_rollout.cpython-310.pyc │ │ └── naive_rollout.py │ ├── rollout_worker.py │ ├── schemas.py │ ├── sglang_rollout │ │ ├── __init__.py │ │ ├── async_sglang_server.py │ │ ├── sglang_rollout.py │ │ └── utils.py │ ├── tokenizer.py │ └── vllm_rollout │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── vllm_rollout_spmd.cpython-310.pyc │ │ ├── vllm_async_server.py │ │ └── vllm_rollout_spmd.py │ └── sharding_manager │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── base.cpython-310.pyc │ ├── fsdp_ulysses.cpython-310.pyc │ └── fsdp_vllm.cpython-310.pyc │ ├── base.py │ ├── fsdp_sglang.py │ ├── fsdp_ulysses.py │ ├── fsdp_vllm.py │ ├── megatron_sglang.py │ └── megatron_vllm.py ├── README.md ├── figs └── Method.png ├── paper └── Co-rewarding.pdf └── scripts └── install_env.sh /Co-rewarding-I/data/math/test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/data/math/test.parquet -------------------------------------------------------------------------------- /Co-rewarding-I/data/math/train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/data/math/train.parquet -------------------------------------------------------------------------------- /Co-rewarding-I/data/math/train_original.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/data/math/train_original.parquet -------------------------------------------------------------------------------- /Co-rewarding-I/data/math/train_pairs.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/data/math/train_pairs.parquet -------------------------------------------------------------------------------- /Co-rewarding-I/data/math/train_rewrite_Qwen3-32B.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/data/math/train_rewrite_Qwen3-32B.jsonl -------------------------------------------------------------------------------- /Co-rewarding-I/data/math/train_rewrite_Qwen3-32B.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/data/math/train_rewrite_Qwen3-32B.parquet -------------------------------------------------------------------------------- /Co-rewarding-I/data/math/upload_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/data/math/upload_file.py -------------------------------------------------------------------------------- /Co-rewarding-I/examples/data_preprocess/full_hh_rlhf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/examples/data_preprocess/full_hh_rlhf.py -------------------------------------------------------------------------------- /Co-rewarding-I/examples/data_preprocess/geo3k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/examples/data_preprocess/geo3k.py -------------------------------------------------------------------------------- /Co-rewarding-I/examples/data_preprocess/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/examples/data_preprocess/gsm8k.py -------------------------------------------------------------------------------- /Co-rewarding-I/examples/data_preprocess/gsm8k_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/examples/data_preprocess/gsm8k_dataset.py -------------------------------------------------------------------------------- /Co-rewarding-I/examples/data_preprocess/gsm8k_multiturn_w_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/examples/data_preprocess/gsm8k_multiturn_w_tool.py -------------------------------------------------------------------------------- /Co-rewarding-I/examples/data_preprocess/hellaswag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/examples/data_preprocess/hellaswag.py -------------------------------------------------------------------------------- /Co-rewarding-I/examples/data_preprocess/math_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/examples/data_preprocess/math_dataset.py -------------------------------------------------------------------------------- /Co-rewarding-I/examples/data_preprocess/multiturn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/examples/data_preprocess/multiturn.py -------------------------------------------------------------------------------- /Co-rewarding-I/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/pyproject.toml -------------------------------------------------------------------------------- /Co-rewarding-I/rewrite_questions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/rewrite_questions.py -------------------------------------------------------------------------------- /Co-rewarding-I/run_corewarding-I.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/run_corewarding-I.sh -------------------------------------------------------------------------------- /Co-rewarding-I/scripts/converter_hf_to_mcore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/scripts/converter_hf_to_mcore.py -------------------------------------------------------------------------------- /Co-rewarding-I/scripts/diagnose.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/scripts/diagnose.py -------------------------------------------------------------------------------- /Co-rewarding-I/scripts/install_env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/scripts/install_env.sh -------------------------------------------------------------------------------- /Co-rewarding-I/scripts/model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/scripts/model_merger.py -------------------------------------------------------------------------------- /Co-rewarding-I/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/setup.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/README.md -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/llama/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/llama/megatron/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/llama/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/llama/megatron/checkpoint_utils/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/llama/megatron/checkpoint_utils/llama_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/llama/megatron/checkpoint_utils/llama_loader.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/llama/megatron/checkpoint_utils/llama_saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/llama/megatron/checkpoint_utils/llama_saver.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/llama/megatron/layers/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/llama/megatron/layers/parallel_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/llama/megatron/layers/parallel_attention.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/llama/megatron/layers/parallel_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/llama/megatron/layers/parallel_decoder.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/llama/megatron/layers/parallel_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/llama/megatron/layers/parallel_linear.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/llama/megatron/layers/parallel_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/llama/megatron/layers/parallel_mlp.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/llama/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/llama/megatron/layers/parallel_rmsnorm.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/llama/megatron/modeling_llama_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/llama/megatron/modeling_llama_megatron.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/mcore/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/mcore/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/mcore/config_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/mcore/config_converter.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/mcore/loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/mcore/loader.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/mcore/model_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/mcore/model_forward.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/mcore/model_initializer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/mcore/model_initializer.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/mcore/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/mcore/readme.md -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/mcore/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/mcore/registry.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/mcore/saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/mcore/saver.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/mcore/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/mcore/util.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/mcore/weight_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/mcore/weight_converter.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/qwen2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/qwen2/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/qwen2/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/qwen2/megatron/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/qwen2/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/qwen2/megatron/checkpoint_utils/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/qwen2/megatron/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/qwen2/megatron/layers/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/qwen2/megatron/layers/parallel_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/qwen2/megatron/layers/parallel_attention.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/qwen2/megatron/layers/parallel_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/qwen2/megatron/layers/parallel_decoder.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/qwen2/megatron/layers/parallel_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/qwen2/megatron/layers/parallel_linear.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/qwen2/megatron/layers/parallel_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/qwen2/megatron/layers/parallel_mlp.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/qwen2/megatron/modeling_qwen2_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/qwen2/megatron/modeling_qwen2_megatron.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/registry.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/transformers/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/transformers/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/transformers/llama.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/transformers/monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/transformers/monkey_patch.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/transformers/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/transformers/qwen2.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/transformers/qwen2_5_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/transformers/qwen2_5_vl.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/transformers/qwen2_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/transformers/qwen2_vl.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/models/weight_loader_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/models/weight_loader_registry.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/protocol.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/single_controller/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/single_controller/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/single_controller/base/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/single_controller/base/decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/single_controller/base/decorator.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/single_controller/base/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/single_controller/base/megatron/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/single_controller/base/megatron/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/single_controller/base/megatron/worker.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/single_controller/base/megatron/worker_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/single_controller/base/megatron/worker_group.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/single_controller/base/register_center/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/single_controller/base/register_center/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/single_controller/base/register_center/ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/single_controller/base/register_center/ray.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/single_controller/base/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/single_controller/base/worker.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/single_controller/base/worker_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/single_controller/base/worker_group.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/single_controller/ray/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/single_controller/ray/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/single_controller/ray/base.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/single_controller/ray/megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/single_controller/ray/megatron.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/sglang/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/sglang/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/sglang/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/sglang/parallel_state.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/arg_utils.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/config.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/llm.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/llm_engine_sp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/llm_engine_sp.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/model_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/model_loader.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/model_runner.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/spmd_gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/spmd_gpu_executor.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/tokenizer.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_5_4/worker.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/arg_utils.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/config.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/llm.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/llm_engine_sp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/llm_engine_sp.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/model_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/model_loader.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/model_runner.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/parallel_state.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/spmd_gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/spmd_gpu_executor.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/third_party/vllm/vllm_v_0_6_3/worker.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/tools/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/tools/base_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/tools/base_tool.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/tools/gsm8k_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/tools/gsm8k_tool.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/tools/schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/tools/schemas.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/trainer/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/trainer/config/evaluation.yaml -------------------------------------------------------------------------------- /Co-rewarding-I/verl/trainer/config/generation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/trainer/config/generation.yaml -------------------------------------------------------------------------------- /Co-rewarding-I/verl/trainer/config/ppo_megatron_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/trainer/config/ppo_megatron_trainer.yaml -------------------------------------------------------------------------------- /Co-rewarding-I/verl/trainer/config/ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/trainer/config/ppo_trainer.yaml -------------------------------------------------------------------------------- /Co-rewarding-I/verl/trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/trainer/config/sft_trainer.yaml -------------------------------------------------------------------------------- /Co-rewarding-I/verl/trainer/fsdp_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/trainer/fsdp_sft_trainer.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/trainer/main_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/trainer/main_eval.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/trainer/main_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/trainer/main_generation.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/trainer/main_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/trainer/main_ppo.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/trainer/ppo/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/trainer/ppo/core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/trainer/ppo/core_algos.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/trainer/ppo/metric_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/trainer/ppo/metric_utils.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/trainer/ppo/ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/trainer/ppo/ray_trainer.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/trainer/ppo/reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/trainer/ppo/reward.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/trainer/runtime_env.yaml -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/checkpoint/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/checkpoint/checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/checkpoint/checkpoint_manager.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/checkpoint/fsdp_checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/checkpoint/fsdp_checkpoint_manager.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/checkpoint/megatron_checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/checkpoint/megatron_checkpoint_manager.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/config.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/dataset/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/dataset/README.md -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/dataset/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/dataset/multiturn_sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/dataset/multiturn_sft_dataset.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/dataset/pair_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/dataset/pair_dataset.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/dataset/rl_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/dataset/rl_dataset.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/dataset/rm_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/dataset/rm_dataset.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/dataset/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/dataset/sft_dataset.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/dataset/vision_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/dataset/vision_utils.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/debug/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/debug/performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/debug/performance.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/debug/profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/debug/profile.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/debug/trajectory_tracker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/debug/trajectory_tracker.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/distributed.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/experimental/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/experimental/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/experimental/torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/experimental/torch_functional.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/flops_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/flops_counter.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/fs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/fs.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/fsdp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/fsdp_utils.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/hdfs_io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/hdfs_io.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/import_utils.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/logger/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/logger/aggregate_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/logger/aggregate_logger.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/logging_utils.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/megatron/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/megatron/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/megatron/memory.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/megatron/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/megatron/optimizer.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/megatron/pipeline_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/megatron/pipeline_parallel.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/megatron/sequence_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/megatron/sequence_parallel.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/megatron/tensor_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/megatron/tensor_parallel.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/megatron_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/megatron_utils.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/memory_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/memory_buffer.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/metric/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/metric/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/metric/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/metric/utils.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/model.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/net_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/net_utils.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/py_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/py_functional.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/ray_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/ray_utils.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/rendezvous/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/rendezvous/ray_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/rendezvous/ray_backend.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/reward_score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/reward_score/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/reward_score/geo3k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/reward_score/geo3k.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/reward_score/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/reward_score/gsm8k.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/reward_score/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/reward_score/math.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/reward_score/math_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/reward_score/math_batch.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/reward_score/math_dapo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/reward_score/math_dapo.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/reward_score/math_verify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/reward_score/math_verify.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/reward_score/prime_code/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/reward_score/prime_code/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/reward_score/prime_code/testing_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/reward_score/prime_code/testing_util.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/reward_score/prime_code/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/reward_score/prime_code/utils.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/reward_score/prime_math/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/reward_score/prime_math/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/reward_score/prime_math/grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/reward_score/prime_math/grader.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/reward_score/prime_math/math_normalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/reward_score/prime_math/math_normalize.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/reward_score/sandbox_fusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/reward_score/sandbox_fusion/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/reward_score/sandbox_fusion/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/reward_score/sandbox_fusion/utils.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/seqlen_balancing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/seqlen_balancing.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/tokenizer.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/torch_dtypes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/torch_dtypes.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/torch_functional.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/tracking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/tracking.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/ulysses.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/utils/vllm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/utils/vllm_utils.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/version/version: -------------------------------------------------------------------------------- 1 | 0.3.1.dev 2 | -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/actor/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/actor/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/actor/base.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/actor/dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/actor/dp_actor.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/actor/megatron_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/actor/megatron_actor.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/critic/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/critic/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/critic/base.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/critic/dp_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/critic/dp_critic.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/critic/megatron_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/critic/megatron_critic.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/fsdp_workers.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/megatron_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/megatron_workers.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/reward_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/reward_manager/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/reward_manager/batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/reward_manager/batch.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/reward_manager/co_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/reward_manager/co_reward.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/reward_manager/dapo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/reward_manager/dapo.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/reward_manager/naive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/reward_manager/naive.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/reward_manager/prime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/reward_manager/prime.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/reward_model/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/reward_model/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/reward_model/base.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/reward_model/megatron/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/reward_model/megatron/reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/reward_model/megatron/reward_model.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/rollout/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/rollout/async_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/rollout/async_server.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/rollout/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/rollout/base.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/rollout/hf_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/rollout/hf_rollout.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/rollout/naive/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/rollout/naive/naive_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/rollout/naive/naive_rollout.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/rollout/schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/rollout/schemas.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/rollout/sglang_rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/rollout/sglang_rollout/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/rollout/sglang_rollout/async_sglang_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/rollout/sglang_rollout/async_sglang_rollout.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/rollout/sglang_rollout/sglang_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/rollout/sglang_rollout/sglang_rollout.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/rollout/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/rollout/tokenizer.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/rollout/vllm_rollout/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/rollout/vllm_rollout/fire_vllm_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/rollout/vllm_rollout/fire_vllm_rollout.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/rollout/vllm_rollout/vllm_async_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/rollout/vllm_rollout/vllm_async_server.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/rollout/vllm_rollout/vllm_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/rollout/vllm_rollout/vllm_rollout.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/sharding_manager/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/sharding_manager/base.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/sharding_manager/fsdp_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/sharding_manager/fsdp_sglang.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/sharding_manager/fsdp_ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/sharding_manager/fsdp_ulysses.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/sharding_manager/fsdp_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/sharding_manager/fsdp_vllm.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/sharding_manager/megatron_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/sharding_manager/megatron_sglang.py -------------------------------------------------------------------------------- /Co-rewarding-I/verl/workers/sharding_manager/megatron_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-I/verl/workers/sharding_manager/megatron_vllm.py -------------------------------------------------------------------------------- /Co-rewarding-II/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/README.md -------------------------------------------------------------------------------- /Co-rewarding-II/data/AMC-TTT/test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/AMC-TTT/test.parquet -------------------------------------------------------------------------------- /Co-rewarding-II/data/AMC-TTT/test_original.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/AMC-TTT/test_original.parquet -------------------------------------------------------------------------------- /Co-rewarding-II/data/AMC-TTT/test_rewrite_Qwen3-32B.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/AMC-TTT/test_rewrite_Qwen3-32B.jsonl -------------------------------------------------------------------------------- /Co-rewarding-II/data/AMC-TTT/test_rewrite_Qwen3-32B.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/AMC-TTT/test_rewrite_Qwen3-32B.parquet -------------------------------------------------------------------------------- /Co-rewarding-II/data/dapo/train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/dapo/train.parquet -------------------------------------------------------------------------------- /Co-rewarding-II/data/dapo/train_original.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/dapo/train_original.parquet -------------------------------------------------------------------------------- /Co-rewarding-II/data/dapo/train_pairs.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/dapo/train_pairs.parquet -------------------------------------------------------------------------------- /Co-rewarding-II/data/dapo/train_rewrite_Qwen3-32B.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/dapo/train_rewrite_Qwen3-32B.jsonl -------------------------------------------------------------------------------- /Co-rewarding-II/data/dapo/train_rewrite_Qwen3-32B.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/dapo/train_rewrite_Qwen3-32B.parquet -------------------------------------------------------------------------------- /Co-rewarding-II/data/dapo/upload_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/dapo/upload_file.py -------------------------------------------------------------------------------- /Co-rewarding-II/data/math/test.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/math/test.parquet -------------------------------------------------------------------------------- /Co-rewarding-II/data/math/train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/math/train.parquet -------------------------------------------------------------------------------- /Co-rewarding-II/data/math/train_original.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/math/train_original.parquet -------------------------------------------------------------------------------- /Co-rewarding-II/data/math/train_pairs.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/math/train_pairs.parquet -------------------------------------------------------------------------------- /Co-rewarding-II/data/math/train_rewrite_Qwen3-32B.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/math/train_rewrite_Qwen3-32B.jsonl -------------------------------------------------------------------------------- /Co-rewarding-II/data/math/train_rewrite_Qwen3-32B.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/math/train_rewrite_Qwen3-32B.parquet -------------------------------------------------------------------------------- /Co-rewarding-II/data/math/upload_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/math/upload_file.py -------------------------------------------------------------------------------- /Co-rewarding-II/data/open-rs/train.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/open-rs/train.parquet -------------------------------------------------------------------------------- /Co-rewarding-II/data/open-rs/train_original.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/open-rs/train_original.parquet -------------------------------------------------------------------------------- /Co-rewarding-II/data/open-rs/train_pairs.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/open-rs/train_pairs.parquet -------------------------------------------------------------------------------- /Co-rewarding-II/data/open-rs/train_rewrite_Qwen3-32B.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/open-rs/train_rewrite_Qwen3-32B.jsonl -------------------------------------------------------------------------------- /Co-rewarding-II/data/open-rs/train_rewrite_Qwen3-32B.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/open-rs/train_rewrite_Qwen3-32B.parquet -------------------------------------------------------------------------------- /Co-rewarding-II/data/open-rs/upload_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/data/open-rs/upload_file.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/data_preprocess/aime2024_multiturn_w_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/data_preprocess/aime2024_multiturn_w_tool.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/data_preprocess/amc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/data_preprocess/amc.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/data_preprocess/dapo17ken_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/data_preprocess/dapo17ken_dataset.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/data_preprocess/dapo_multiturn_w_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/data_preprocess/dapo_multiturn_w_tool.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/data_preprocess/full_hh_rlhf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/data_preprocess/full_hh_rlhf.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/data_preprocess/geo3k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/data_preprocess/geo3k.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/data_preprocess/geo3k_multiturn_w_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/data_preprocess/geo3k_multiturn_w_tool.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/data_preprocess/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/data_preprocess/gsm8k.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/data_preprocess/gsm8k_multiturn_w_interaction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/data_preprocess/gsm8k_multiturn_w_interaction.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/data_preprocess/gsm8k_multiturn_w_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/data_preprocess/gsm8k_multiturn_w_tool.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/data_preprocess/gsm8k_tool_agent_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/data_preprocess/gsm8k_tool_agent_loop.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/data_preprocess/hellaswag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/data_preprocess/hellaswag.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/data_preprocess/math_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/data_preprocess/math_dataset.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/data_preprocess/multiturn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/data_preprocess/multiturn.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/data_preprocess/open-rs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/data_preprocess/open-rs.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/data_preprocess/preprocess_search_r1_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/data_preprocess/preprocess_search_r1_dataset.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/generation/run_deepseek7b_mutli_node.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/generation/run_deepseek7b_mutli_node.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/generation/run_deepseek_v2_lite_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/generation/run_deepseek_v2_lite_math.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/gmpo_trainer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/gmpo_trainer/README.md -------------------------------------------------------------------------------- /Co-rewarding-II/examples/gmpo_trainer/run_qwen2_5-7b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/gmpo_trainer/run_qwen2_5-7b_math.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/gmpo_trainer/test_dapo_7b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/gmpo_trainer/test_dapo_7b_math.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/gmpo_trainer/test_dapo_qwen3_30b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/gmpo_trainer/test_dapo_qwen3_30b_math.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/gpg_trainer/gpg.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/gpg_trainer/gpg.md -------------------------------------------------------------------------------- /Co-rewarding-II/examples/gpg_trainer/run_qwen2-7b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/gpg_trainer/run_qwen2-7b_math.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/gpg_trainer/run_qwen2-7b_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/gpg_trainer/run_qwen2-7b_math_megatron.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/README.md -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_deepseek7b_llm_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_deepseek7b_llm_math.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_deepseek7b_llm_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_deepseek7b_llm_math_megatron.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_minicpmo2_6.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_minicpmo2_6.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_moonlight16b_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_moonlight16b_math_megatron.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen2-7b.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen2-7b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen2-7b_math.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen2-7b_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen2-7b_math_megatron.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen2-7b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen2-7b_seq_balance.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen2-7b_sgl_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen2-7b_sgl_megatron.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen2_5-3b_gsm8k_grpo_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen2_5-3b_gsm8k_grpo_lora.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_32b_grpo_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_32b_grpo_npu.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_7b_grpo_e2e_prof_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_7b_grpo_e2e_prof_npu.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_7b_grpo_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_7b_grpo_npu.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_vl-7b-megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_vl-7b-megatron.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_vl-7b-sglang.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_vl-7b-sglang.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_vl-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_vl-7b.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_vl-7b_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_vl-7b_lora.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_vl-7b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_vl-7b_seq_balance.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_vl_32b_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_vl_32b_npu.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_vl_3b_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_vl_3b_npu.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_vl_7b_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen2_5_vl_7b_npu.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen3-235b_megatron_96gb.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen3-235b_megatron_96gb.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen3-8b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen3-8b.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/grpo_trainer/run_qwen3moe-30b_megatron_96gb.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/grpo_trainer/run_qwen3moe-30b_megatron_96gb.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/ppo_trainer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/ppo_trainer/README.md -------------------------------------------------------------------------------- /Co-rewarding-II/examples/ppo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/ppo_trainer/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/ppo_trainer/run_deepseek7b_llm_modelscope.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/ppo_trainer/run_deepseek7b_llm_modelscope.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/ppo_trainer/run_deepseek7b_llm_pfppo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/ppo_trainer/run_deepseek7b_llm_pfppo.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/ppo_trainer/run_deepseek7b_llm_sandbox_fusion.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/ppo_trainer/run_deepseek7b_llm_sandbox_fusion.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/ppo_trainer/run_deepseek7b_llm_sp2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/ppo_trainer/run_deepseek7b_llm_sp2.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/ppo_trainer/run_gemma.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/ppo_trainer/run_gemma.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/ppo_trainer/run_qwen2-7b_math_gsm8k_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/ppo_trainer/run_qwen2-7b_math_gsm8k_megatron.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/ppo_trainer/run_qwen2-7b_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/ppo_trainer/run_qwen2-7b_rm.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/ppo_trainer/run_qwen2-7b_rm_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/ppo_trainer/run_qwen2-7b_rm_seq_balance.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/ppo_trainer/run_qwen2-7b_rm_seq_balance_nsys.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/ppo_trainer/run_qwen2-7b_rm_seq_balance_nsys.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/ppo_trainer/run_qwen2-7b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/ppo_trainer/run_qwen2-7b_seq_balance.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/ppo_trainer/run_qwen2-7b_sglang_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/ppo_trainer/run_qwen2-7b_sglang_seq_balance.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/ppo_trainer/run_qwen2.5-32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/ppo_trainer/run_qwen2.5-32b.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/ray/tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/ray/tutorial.ipynb -------------------------------------------------------------------------------- /Co-rewarding-II/examples/remax_trainer/run_qwen2.5-3b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/remax_trainer/run_qwen2.5-3b_seq_balance.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/remax_trainer/run_qwen2.5-7b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/remax_trainer/run_qwen2.5-7b_seq_balance.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/rloo_trainer/run_qwen2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/rloo_trainer/run_qwen2-7b.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/sft/gsm8k/run_deepseek_6b7.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/sft/gsm8k/run_deepseek_6b7.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/sft/gsm8k/run_gemma_2b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/sft/gsm8k/run_gemma_2b.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/sft/gsm8k/run_gemma_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/sft/gsm8k/run_gemma_7b.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/sft/gsm8k/run_qwen3_8b_sft_peft_sp2_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/sft/gsm8k/run_qwen3_8b_sft_peft_sp2_npu.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/sft/gsm8k/run_qwen_05_peft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/sft/gsm8k/run_qwen_05_peft.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/sft/gsm8k/run_qwen_05_sp2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/sft/gsm8k/run_qwen_05_sp2.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/sft/gsm8k/run_qwen_05_sp2_liger.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/sft/gsm8k/run_qwen_05_sp2_liger.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/sft/multiturn/run_qwen_05_sp2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/sft/multiturn/run_qwen_05_sp2.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/sglang_multiturn/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/sglang_multiturn/README.md -------------------------------------------------------------------------------- /Co-rewarding-II/examples/sglang_multiturn/run_qwen3_4b_dapo_multiturn.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/sglang_multiturn/run_qwen3_4b_dapo_multiturn.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/slurm/ray_on_slurm.slurm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/slurm/ray_on_slurm.slurm -------------------------------------------------------------------------------- /Co-rewarding-II/examples/split_placement/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/split_placement/README.md -------------------------------------------------------------------------------- /Co-rewarding-II/examples/split_placement/config/ppo_trainer_split.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/split_placement/config/ppo_trainer_split.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/examples/split_placement/main_ppo_split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/split_placement/main_ppo_split.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/split_placement/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/split_placement/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/split_placement/split_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/split_placement/split_monkey_patch.py -------------------------------------------------------------------------------- /Co-rewarding-II/examples/tuning/14b/qwen2_14b_grpo_4_h800_fsdp_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/tuning/14b/qwen2_14b_grpo_4_h800_fsdp_vllm.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/tuning/70b/qwen2-70b_grpo_32_h20_fsdp_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/tuning/70b/qwen2-70b_grpo_32_h20_fsdp_vllm.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/tuning/70b/qwen2-70b_grpo_32_h800_fsdp_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/tuning/70b/qwen2-70b_grpo_32_h800_fsdp_vllm.sh -------------------------------------------------------------------------------- /Co-rewarding-II/examples/tuning/7b/qwen2-7b_grpo_2_h800_fsdp_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/examples/tuning/7b/qwen2-7b_grpo_2_h800_fsdp_vllm.sh -------------------------------------------------------------------------------- /Co-rewarding-II/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/requirements.txt -------------------------------------------------------------------------------- /Co-rewarding-II/run_corewarding-II.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/run_corewarding-II.sh -------------------------------------------------------------------------------- /Co-rewarding-II/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/scripts/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/scripts/converter_hf_to_mcore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/scripts/converter_hf_to_mcore.py -------------------------------------------------------------------------------- /Co-rewarding-II/scripts/diagnose.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/scripts/diagnose.py -------------------------------------------------------------------------------- /Co-rewarding-II/scripts/generate_trainer_config.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/scripts/generate_trainer_config.sh -------------------------------------------------------------------------------- /Co-rewarding-II/scripts/init_random_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/scripts/init_random_model.py -------------------------------------------------------------------------------- /Co-rewarding-II/scripts/install_vllm_sglang_mcore.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/scripts/install_vllm_sglang_mcore.sh -------------------------------------------------------------------------------- /Co-rewarding-II/scripts/legacy_model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/scripts/legacy_model_merger.py -------------------------------------------------------------------------------- /Co-rewarding-II/scripts/model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/scripts/model_merger.py -------------------------------------------------------------------------------- /Co-rewarding-II/scripts/print_cfg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/scripts/print_cfg.py -------------------------------------------------------------------------------- /Co-rewarding-II/scripts/rollout_viewer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/scripts/rollout_viewer.py -------------------------------------------------------------------------------- /Co-rewarding-II/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/setup.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/__pycache__/base_config.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/__pycache__/base_config.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/__pycache__/protocol.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/__pycache__/protocol.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/base_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/base_config.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/experimental/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/experimental/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/experimental/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/experimental/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/experimental/agent_loop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/experimental/agent_loop/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/experimental/agent_loop/agent_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/experimental/agent_loop/agent_loop.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/experimental/agent_loop/single_turn_agent_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/experimental/agent_loop/single_turn_agent_loop.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/experimental/agent_loop/tool_agent_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/experimental/agent_loop/tool_agent_loop.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/experimental/agent_loop/tool_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/experimental/agent_loop/tool_parser.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/experimental/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/experimental/dataset/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/experimental/dataset/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/experimental/dataset/sampler.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/experimental/dynamic_dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/experimental/dynamic_dataset/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/experimental/dynamic_dataset/dynamicgen_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/experimental/dynamic_dataset/dynamicgen_dataset.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/interactions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/interactions/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/interactions/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/interactions/base.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/interactions/gsm8k_interaction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/interactions/gsm8k_interaction.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/interactions/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/interactions/utils/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/interactions/utils/interaction_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/interactions/utils/interaction_registry.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/model_merger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/model_merger/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/model_merger/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/model_merger/__main__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/model_merger/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/model_merger/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/model_merger/__pycache__/__main__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/model_merger/__pycache__/__main__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/model_merger/base_model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/model_merger/base_model_merger.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/model_merger/fsdp_model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/model_merger/fsdp_model_merger.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/model_merger/megatron_model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/model_merger/megatron_model_merger.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/README.md -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/__pycache__/registry.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/__pycache__/registry.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/llama/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/llama/megatron/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/llama/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/llama/megatron/checkpoint_utils/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/llama/megatron/layers/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/llama/megatron/layers/parallel_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/llama/megatron/layers/parallel_attention.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/llama/megatron/layers/parallel_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/llama/megatron/layers/parallel_decoder.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/llama/megatron/layers/parallel_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/llama/megatron/layers/parallel_linear.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/llama/megatron/layers/parallel_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/llama/megatron/layers/parallel_mlp.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/llama/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/llama/megatron/layers/parallel_rmsnorm.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/llama/megatron/modeling_llama_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/llama/megatron/modeling_llama_megatron.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/config_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/config_converter.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/loader.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/mbridge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/mbridge.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/model_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/model_forward.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/model_forward_fused.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/model_forward_fused.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/model_initializer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/model_initializer.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/patch_v012.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/patch_v012.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/qwen2_5_vl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/qwen2_5_vl/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/qwen2_5_vl/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/qwen2_5_vl/attention.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/qwen2_5_vl/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/qwen2_5_vl/model.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/qwen2_5_vl/rope_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/qwen2_5_vl/rope_utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/qwen2_5_vl/vision_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/qwen2_5_vl/vision_config.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/qwen2_5_vl/vision_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/qwen2_5_vl/vision_model.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/qwen2_5_vl/vision_transformer_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/qwen2_5_vl/vision_transformer_block.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/readme.md -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/registry.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/saver.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/util.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/mcore/weight_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/mcore/weight_converter.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/qwen2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/qwen2/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/qwen2/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/qwen2/megatron/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/qwen2/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/qwen2/megatron/checkpoint_utils/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/qwen2/megatron/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/qwen2/megatron/layers/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/qwen2/megatron/layers/parallel_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/qwen2/megatron/layers/parallel_attention.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/qwen2/megatron/layers/parallel_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/qwen2/megatron/layers/parallel_decoder.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/qwen2/megatron/layers/parallel_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/qwen2/megatron/layers/parallel_linear.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/qwen2/megatron/layers/parallel_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/qwen2/megatron/layers/parallel_mlp.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/qwen2/megatron/modeling_qwen2_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/qwen2/megatron/modeling_qwen2_megatron.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/registry.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/transformers/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/transformers/dense_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/transformers/dense_common.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/transformers/kimi_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/transformers/kimi_vl.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/transformers/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/transformers/llama.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/transformers/monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/transformers/monkey_patch.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/transformers/npu_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/transformers/npu_patch.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/transformers/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/transformers/qwen2.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/transformers/qwen2_5_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/transformers/qwen2_5_vl.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/transformers/qwen2_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/transformers/qwen2_vl.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/models/weight_loader_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/models/weight_loader_registry.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/protocol.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Co-rewarding-II/verl/single_controller/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/single_controller/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/single_controller/base/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/single_controller/base/decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/single_controller/base/decorator.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/single_controller/base/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/single_controller/base/worker.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/single_controller/base/worker_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/single_controller/base/worker_group.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/single_controller/ray/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/single_controller/ray/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/single_controller/ray/base.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/third_party/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/third_party/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/third_party/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/third_party/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/third_party/sglang/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/third_party/sglang/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/third_party/sglang/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/third_party/sglang/parallel_state.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/third_party/torch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/third_party/torch/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/third_party/torch/distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/third_party/torch/distributed/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/third_party/torch/distributed/_state_dict_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/third_party/torch/distributed/_state_dict_utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/third_party/vllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/third_party/vllm/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/tools/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/tools/base_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/tools/base_tool.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/tools/geo3k_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/tools/geo3k_tool.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/tools/gsm8k_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/tools/gsm8k_tool.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/tools/image_zoom_in_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/tools/image_zoom_in_tool.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/tools/mcp_base_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/tools/mcp_base_tool.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/tools/mcp_search_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/tools/mcp_search_tool.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/tools/sandbox_fusion_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/tools/sandbox_fusion_tools.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/tools/schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/tools/schemas.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/tools/search_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/tools/search_tool.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/tools/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/tools/utils/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/tools/utils/mcp_clients/McpClientManager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/tools/utils/mcp_clients/McpClientManager.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/tools/utils/mcp_clients/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/tools/utils/mcp_clients/utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/tools/utils/search_r1_like_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/tools/utils/search_r1_like_utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/tools/utils/tool_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/tools/utils/tool_registry.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/__pycache__/constants_ppo.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/__pycache__/constants_ppo.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/__pycache__/main_ppo.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/__pycache__/main_ppo.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/__pycache__/config.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/__pycache__/config.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/_generated_ppo_megatron_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/_generated_ppo_megatron_trainer.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/_generated_ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/_generated_ppo_trainer.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/actor/actor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/actor/actor.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/actor/dp_actor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/actor/dp_actor.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/actor/megatron_actor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/actor/megatron_actor.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/algorithm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/algorithm.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/config.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/critic/critic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/critic/critic.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/critic/dp_critic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/critic/dp_critic.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/critic/megatron_critic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/critic/megatron_critic.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/data/legacy_data.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/data/legacy_data.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/evaluation.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/generation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/generation.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/npu_profile/npu_profile.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/npu_profile/npu_profile.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/ppo_megatron_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/ppo_megatron_trainer.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/ppo_trainer.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/ref/dp_ref.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/ref/dp_ref.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/ref/megatron_ref.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/ref/megatron_ref.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/ref/ref.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/ref/ref.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/reward_model/dp_reward_model.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/reward_model/dp_reward_model.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/reward_model/reward_model.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/reward_model/reward_model.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/rollout/rollout.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/rollout/rollout.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/config/sft_trainer.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/constants_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/constants_ppo.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/fsdp_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/fsdp_sft_trainer.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/main_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/main_eval.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/main_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/main_generation.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/main_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/main_ppo.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/ppo/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/ppo/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/ppo/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/ppo/__pycache__/core_algos.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/ppo/__pycache__/core_algos.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/ppo/__pycache__/ray_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/ppo/__pycache__/ray_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/ppo/__pycache__/reward.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/ppo/__pycache__/reward.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/ppo/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/ppo/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/ppo/core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/ppo/core_algos.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/ppo/metric_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/ppo/metric_utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/ppo/ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/ppo/ray_trainer.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/ppo/ray_trainer_coreward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/ppo/ray_trainer_coreward.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/ppo/reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/ppo/reward.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/ppo/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/ppo/utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/trainer/runtime_env.yaml -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/config.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/config.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/device.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/device.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/flops_counter.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/flops_counter.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/fs.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/fs.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/fsdp_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/fsdp_utils.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/hdfs_io.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/hdfs_io.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/import_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/import_utils.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/logging_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/logging_utils.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/memory_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/memory_utils.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/py_functional.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/py_functional.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/ray_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/ray_utils.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/rollout_skip.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/rollout_skip.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/seqlen_balancing.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/seqlen_balancing.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/tokenizer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/tokenizer.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/torch_dtypes.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/torch_dtypes.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/torch_functional.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/torch_functional.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/tracking.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/tracking.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/__pycache__/ulysses.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/__pycache__/ulysses.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/activation_offload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/activation_offload.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/checkpoint/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/checkpoint/checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/checkpoint/checkpoint_manager.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/checkpoint/fsdp_checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/checkpoint/fsdp_checkpoint_manager.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/checkpoint/megatron_checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/checkpoint/megatron_checkpoint_manager.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/config.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/dataset/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/dataset/README.md -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/dataset/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/dataset/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/dataset/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/dataset/multiturn_sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/dataset/multiturn_sft_dataset.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/dataset/pair_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/dataset/pair_dataset.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/dataset/rl_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/dataset/rl_dataset.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/dataset/rm_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/dataset/rm_dataset.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/dataset/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/dataset/sft_dataset.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/dataset/vision_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/dataset/vision_utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/debug/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/debug/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/debug/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/debug/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/debug/metrics.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/debug/performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/debug/performance.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/debug/trajectory_tracker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/debug/trajectory_tracker.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/device.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/device.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/distributed.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/experimental/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/experimental/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/experimental/torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/experimental/torch_functional.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/flops_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/flops_counter.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/fs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/fs.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/fsdp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/fsdp_utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/hdfs_io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/hdfs_io.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/import_utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/kernel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/kernel/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/kernel/kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/kernel/kernels.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/kernel/linear_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/kernel/linear_cross_entropy.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/logger/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/logger/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/logger/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/logger/aggregate_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/logger/aggregate_logger.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/logging_utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/megatron/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/megatron/dist_checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/megatron/dist_checkpointing.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/megatron/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/megatron/memory.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/megatron/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/megatron/optimizer.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/megatron/pipeline_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/megatron/pipeline_parallel.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/megatron/sequence_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/megatron/sequence_parallel.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/megatron/tensor_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/megatron/tensor_parallel.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/megatron_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/megatron_utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/memory_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/memory_buffer.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/memory_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/memory_utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/metric/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/metric/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/metric/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/metric/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/metric/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/metric/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/metric/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/metric/utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/model.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/net_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/net_utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/profiler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/profiler/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/profiler/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/profiler/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/profiler/__pycache__/config.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/profiler/__pycache__/config.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/profiler/__pycache__/profile.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/profiler/__pycache__/profile.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/profiler/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/profiler/config.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/profiler/empty_annotations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/profiler/empty_annotations.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/profiler/mstx_profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/profiler/mstx_profile.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/profiler/nvtx_profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/profiler/nvtx_profile.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/profiler/performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/profiler/performance.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/profiler/profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/profiler/profile.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/py_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/py_functional.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/ray_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/ray_utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/rendezvous/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/rendezvous/ray_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/rendezvous/ray_backend.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/__pycache__/math.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/__pycache__/math.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/geo3k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/geo3k.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/gsm8k.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/math.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/math_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/math_batch.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/math_dapo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/math_dapo.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/math_verify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/math_verify.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/prime_code/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/prime_code/README.md -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/prime_code/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/prime_code/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/prime_code/testing_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/prime_code/testing_util.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/prime_code/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/prime_code/utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/prime_math/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/prime_math/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/prime_math/grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/prime_math/grader.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/prime_math/math_normalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/prime_math/math_normalize.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/sandbox_fusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/sandbox_fusion/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/sandbox_fusion/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/sandbox_fusion/utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/reward_score/search_r1_like_qa_em.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/reward_score/search_r1_like_qa_em.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/rollout_skip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/rollout_skip.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/rollout_trace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/rollout_trace.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/seqlen_balancing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/seqlen_balancing.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/tokenizer.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/torch_dtypes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/torch_dtypes.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/torch_functional.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/tracking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/tracking.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/transformers_compat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/transformers_compat.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/ulysses.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/vllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/vllm/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/vllm/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/vllm/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/vllm/__pycache__/patch.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/vllm/__pycache__/patch.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/vllm/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/vllm/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/vllm/patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/vllm/patch.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/utils/vllm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/utils/vllm/utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/version/version: -------------------------------------------------------------------------------- 1 | 0.5.0.dev 2 | -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/__pycache__/fsdp_workers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/__pycache__/fsdp_workers.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/actor/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/actor/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/actor/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/actor/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/actor/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/actor/__pycache__/dp_actor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/actor/__pycache__/dp_actor.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/actor/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/actor/base.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/actor/dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/actor/dp_actor.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/actor/megatron_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/actor/megatron_actor.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/config/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/config/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/config/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/config/__pycache__/actor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/config/__pycache__/actor.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/config/__pycache__/critic.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/config/__pycache__/critic.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/config/__pycache__/engine.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/config/__pycache__/engine.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/config/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/config/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/config/__pycache__/rollout.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/config/__pycache__/rollout.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/config/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/config/actor.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/config/critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/config/critic.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/config/engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/config/engine.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/config/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/config/model.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/config/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/config/optimizer.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/config/rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/config/rollout.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/critic/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/critic/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/critic/base.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/critic/dp_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/critic/dp_critic.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/critic/megatron_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/critic/megatron_critic.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/engine/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/engine/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/engine/base.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/engine/fsdp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/engine/fsdp/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/engine/fsdp/engine_impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/engine/fsdp/engine_impl.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/engine/fsdp/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/engine/fsdp/utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/engine/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/engine/megatron/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/engine/megatron/engine_impl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/engine/megatron/engine_impl.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/fsdp_workers.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/megatron_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/megatron_workers.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/reward_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/reward_manager/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/reward_manager/abstract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/reward_manager/abstract.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/reward_manager/batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/reward_manager/batch.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/reward_manager/co_rewarding_II.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/reward_manager/co_rewarding_II.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/reward_manager/dapo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/reward_manager/dapo.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/reward_manager/majority_voting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/reward_manager/majority_voting.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/reward_manager/naive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/reward_manager/naive.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/reward_manager/naive_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/reward_manager/naive_train.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/reward_manager/prime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/reward_manager/prime.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/reward_manager/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/reward_manager/registry.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/reward_model/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/reward_model/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/reward_model/base.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/reward_model/megatron/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/reward_model/megatron/reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/reward_model/megatron/reward_model.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/roles/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/roles/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/roles/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/roles/actor.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/roles/critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/roles/critic.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/rollout/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/rollout/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/rollout/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/rollout/async_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/rollout/async_server.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/rollout/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/rollout/base.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/rollout/hf_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/rollout/hf_rollout.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/rollout/naive/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/rollout/naive/naive_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/rollout/naive/naive_rollout.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/rollout/rollout_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/rollout/rollout_worker.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/rollout/schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/rollout/schemas.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/rollout/sglang_rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/rollout/sglang_rollout/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/rollout/sglang_rollout/sglang_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/rollout/sglang_rollout/sglang_rollout.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/rollout/sglang_rollout/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/rollout/sglang_rollout/utils.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/rollout/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/rollout/tokenizer.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/rollout/vllm_rollout/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/rollout/vllm_rollout/vllm_async_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/rollout/vllm_rollout/vllm_async_server.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/sharding_manager/__init__.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/sharding_manager/base.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/sharding_manager/fsdp_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/sharding_manager/fsdp_sglang.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/sharding_manager/fsdp_ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/sharding_manager/fsdp_ulysses.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/sharding_manager/fsdp_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/sharding_manager/fsdp_vllm.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/sharding_manager/megatron_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/sharding_manager/megatron_sglang.py -------------------------------------------------------------------------------- /Co-rewarding-II/verl/workers/sharding_manager/megatron_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/Co-rewarding-II/verl/workers/sharding_manager/megatron_vllm.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/README.md -------------------------------------------------------------------------------- /figs/Method.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/figs/Method.png -------------------------------------------------------------------------------- /paper/Co-rewarding.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/paper/Co-rewarding.pdf -------------------------------------------------------------------------------- /scripts/install_env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmlr-group/Co-rewarding/HEAD/scripts/install_env.sh --------------------------------------------------------------------------------