├── README.md ├── all_embd_to_openai.py ├── all_to_tool_call ├── README.md ├── all_to_tool_call.py └── test.ipynb ├── code-r1 ├── .gemini │ └── config.yaml ├── .github │ ├── CODEOWNERS │ ├── PULL_REQUEST_TEMPLATE.md │ ├── dependabot.yml │ └── workflows │ │ ├── check-pr-title.yml │ │ ├── checkpoint_converter.yml │ │ ├── cpu_unit_tests.yml │ │ ├── disabled │ │ └── e2e_prime.yml │ │ ├── doc.yml │ │ ├── e2e_ascend.yml │ │ ├── e2e_dapo.yml │ │ ├── e2e_eval_aime24.yml │ │ ├── e2e_genrm_remote.yml │ │ ├── e2e_ppo_trainer.yml │ │ ├── e2e_ppo_trainer_megatron_sglang.yml │ │ ├── e2e_ppo_trainer_megatron_vllm.yml │ │ ├── e2e_sft.yml │ │ ├── e2e_spin.yml │ │ ├── e2e_sppo.yml │ │ ├── gpu_unit_tests.yml │ │ ├── model.yml │ │ ├── pre-commit-full.yml │ │ ├── pre-commit.yml │ │ ├── sanity.yml │ │ ├── scorecard.yml │ │ ├── secrets_scan.yml │ │ ├── sgl.yml │ │ ├── type-coverage-check.yml │ │ └── vllm.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── CONTRIBUTING.md ├── LICENSE ├── Notice.txt ├── README.md ├── code-r1.png ├── data │ └── data_process.ipynb ├── docker │ ├── Apptainerfile.rocm │ ├── Dockerfile.extention.awsefa │ ├── Dockerfile.ngc.vllm │ ├── Dockerfile.ngc.vllm0.8 │ ├── Dockerfile.ngc.vllm0.8.sagemaker │ ├── Dockerfile.rocm │ ├── Dockerfile.sglang │ ├── Dockerfile.vemlp.vllm.te │ ├── Dockerfile.vllm.sglang.megatron.deepseek │ ├── README.md │ ├── verl0.4-cu124-torch2.6-fa2.7.4 │ │ ├── Dockerfile.app.sglang.vllm.mcore0.12 │ │ ├── Dockerfile.app.sglang.vllm.mcore0.13.preview │ │ ├── Dockerfile.app.vllm.mcore0.12 │ │ ├── Dockerfile.app.vllm.mcore0.13.preview │ │ ├── Dockerfile.base │ │ └── README.md │ ├── verl0.5-cu126-torch2.7.1-fa2.8.0 │ │ ├── Dockerfile.app.sglang.mcore0.12 │ │ ├── Dockerfile.app.sglang.mcore0.13.preview │ │ ├── Dockerfile.base │ │ └── README.md │ └── verl0.5-preview-cu128-torch2.7.1-fa2.8.0 │ │ ├── Dockerfile.app.sglang.megatron │ │ ├── Dockerfile.base │ │ └── README.md ├── docs │ ├── Makefile │ ├── README.md │ ├── README_vllm0.7.md │ ├── README_vllm0.8.md │ ├── _static │ │ ├── js │ │ │ └── runllm-widget.js │ │ └── logo.png │ ├── advance │ │ ├── checkpoint.rst │ │ ├── dpo_extension.rst │ │ ├── fsdp_extension.rst │ │ ├── megatron_extension.rst │ │ ├── placement.rst │ │ ├── ppo_lora.rst │ │ └── rope.rst │ ├── algo │ │ ├── baseline.md │ │ ├── dapo.md │ │ ├── entropy.md │ │ ├── gpg.md │ │ ├── grpo.md │ │ ├── opo.md │ │ ├── ppo.md │ │ ├── spin.md │ │ └── sppo.md │ ├── amd_tutorial │ │ ├── amd_build_dockerfile_page.rst │ │ └── amd_vllm_page.rst │ ├── api │ │ ├── data.rst │ │ ├── single_controller.rst │ │ ├── trainer.rst │ │ └── utils.rst │ ├── ascend_tutorial │ │ └── ascend_quick_start.rst │ ├── conf.py │ ├── examples │ │ ├── config.rst │ │ ├── gsm8k_example.rst │ │ ├── multi_modal_example.rst │ │ ├── ppo_code_architecture.rst │ │ └── sandbox_fusion_example.rst │ ├── faq │ │ └── faq.rst │ ├── hybrid_flow.rst │ ├── index.rst │ ├── perf │ │ ├── device_tuning.rst │ │ ├── dpsk.md │ │ ├── nsight_profiling.md │ │ └── perf_tuning.rst │ ├── preparation │ │ ├── prepare_data.rst │ │ └── reward_function.rst │ ├── requirements-docs.txt │ ├── sglang_multiturn │ │ ├── interaction_system.rst │ │ ├── multiturn.rst │ │ ├── sandbox_fusion.rst │ │ └── search_tool_example.rst │ ├── single_controller.rst │ ├── start │ │ ├── install.rst │ │ ├── more_resources.rst │ │ ├── multinode.rst │ │ ├── quickstart.rst │ │ └── ray_debug_tutorial.rst │ └── workers │ │ ├── fsdp_workers.rst │ │ ├── megatron_workers.rst │ │ ├── ray_trainer.rst │ │ └── sglang_worker.rst ├── examples │ ├── data_preprocess │ │ ├── aime2024_multiturn_w_tool.py │ │ ├── dapo_multiturn_w_tool.py │ │ ├── full_hh_rlhf.py │ │ ├── geo3k.py │ │ ├── geo3k_multiturn_w_tool.py │ │ ├── gsm8k.py │ │ ├── gsm8k_multiturn_w_interaction.py │ │ ├── gsm8k_multiturn_w_tool.py │ │ ├── hellaswag.py │ │ ├── math_dataset.py │ │ ├── multiturn.py │ │ └── preprocess_search_r1_dataset.py │ ├── generation │ │ ├── run_deepseek7b_mutli_node.sh │ │ └── run_deepseek_v2_lite_math.sh │ ├── gpg_trainer │ │ ├── gpg.md │ │ ├── run_qwen2-7b_math.sh │ │ └── run_qwen2-7b_math_megatron.sh │ ├── grpo_trainer │ │ ├── README.md │ │ ├── run_deepseek671b_math_megatron.sh │ │ ├── run_deepseek7b_llm.sh │ │ ├── run_deepseek7b_llm_math.sh │ │ ├── run_deepseek7b_llm_math_megatron.sh │ │ ├── run_deepseek7b_llm_seq_balance.sh │ │ ├── run_minicpmo2_6.sh │ │ ├── run_moonlight16b_math_megatron.sh │ │ ├── run_qwen2-7b.sh │ │ ├── run_qwen2-7b_math.sh │ │ ├── run_qwen2-7b_math_megatron.sh │ │ ├── run_qwen2-7b_seq_balance.sh │ │ ├── run_qwen2-7b_seq_balance_math_megatron.sh │ │ ├── run_qwen2-7b_sgl_megatron.sh │ │ ├── run_qwen2_5-3b_gsm8k_grpo_lora.sh │ │ ├── run_qwen2_5-7b_math_megatron_diff_tp.sh │ │ ├── run_qwen2_5_32b_grpo_npu.sh │ │ ├── run_qwen2_5_7b_grpo_npu.sh │ │ ├── run_qwen2_5_vl-7b-megatron.sh │ │ ├── run_qwen2_5_vl-7b.sh │ │ ├── run_qwen2_5_vl-7b_lora.sh │ │ ├── run_qwen2_5_vl-7b_seq_balance.sh │ │ ├── run_qwen2_5_vl_32b_npu.sh │ │ ├── run_qwen2_5_vl_3b_npu.sh │ │ ├── run_qwen2_5_vl_7b_npu.sh │ │ ├── run_qwen3-236b_megatron.sh │ │ ├── run_qwen3-8b.sh │ │ └── run_qwen3moe-30b_megatron.sh │ ├── ppo_trainer │ │ ├── README.md │ │ ├── run_deepseek7b_llm.sh │ │ ├── run_deepseek7b_llm_modelscope.sh │ │ ├── run_deepseek7b_llm_pfppo.sh │ │ ├── run_deepseek7b_llm_sandbox_fusion.sh │ │ ├── run_deepseek7b_llm_sp2.sh │ │ ├── run_deepseek_full_hh_rlhf.sh │ │ ├── run_deepseek_math_gsm8k_megatron.sh │ │ ├── run_deepseek_math_gsm8k_megatron_nsys.sh │ │ ├── run_gemma.sh │ │ ├── run_moonlight16b_a3b_gsm8k_megatron.sh │ │ ├── run_qwen1.5_moe_a2.7b-gsm8k_megatron.sh │ │ ├── run_qwen2-7b_math_gsm8k_megatron.sh │ │ ├── run_qwen2-7b_rm.sh │ │ ├── run_qwen2-7b_rm_seq_balance.sh │ │ ├── run_qwen2-7b_rm_seq_balance_fused_kernels.sh │ │ ├── run_qwen2-7b_rm_seq_balance_nsys.sh │ │ ├── run_qwen2-7b_seq_balance.sh │ │ ├── run_qwen2-7b_sglang_seq_balance.sh │ │ └── run_qwen2.5-32b.sh │ ├── ray │ │ └── tutorial.ipynb │ ├── reinforce_plus_plus_trainer │ │ ├── run_qwen2-7b_math_rf.sh │ │ └── run_qwen2-7b_math_rf_baseline.sh │ ├── remax_trainer │ │ ├── run_qwen2.5-3b_seq_balance.sh │ │ └── run_qwen2.5-7b_seq_balance.sh │ ├── rloo_trainer │ │ └── run_qwen2-7b.sh │ ├── sft │ │ ├── gsm8k │ │ │ ├── run_deepseek_6b7.sh │ │ │ ├── run_gemma_2b.sh │ │ │ ├── run_gemma_7b.sh │ │ │ ├── run_qwen2_5_05b_sft_peft_sp2_npu.sh │ │ │ ├── run_qwen_05_peft.sh │ │ │ ├── run_qwen_05_sp2.sh │ │ │ └── run_qwen_05_sp2_liger.sh │ │ └── multiturn │ │ │ └── run_qwen_05_sp2.sh │ ├── sglang_multiturn │ │ ├── README.md │ │ ├── config │ │ │ ├── geo3k_multiturn_grpo.yaml │ │ │ ├── geo3k_multiturn_megatron_grpo.yaml │ │ │ ├── gsm8k_multiturn_grpo.yaml │ │ │ ├── gsm8k_multiturn_grpo_w_interaction.yaml │ │ │ ├── gsm8k_multiturn_megatron_grpo.yaml │ │ │ ├── interaction_config │ │ │ │ └── gsm8k_interaction_config.yaml │ │ │ ├── retool_multiturn_grpo.yaml │ │ │ ├── search_multiturn_grpo.yaml │ │ │ └── tool_config │ │ │ │ ├── geo3k_tool_config.yaml │ │ │ │ ├── gsm8k_tool_config.yaml │ │ │ │ ├── mcp_server.json │ │ │ │ ├── mcp_tool_config.yaml │ │ │ │ ├── sandbox_fusion_tool_config.yaml │ │ │ │ └── search_tool_config.yaml │ │ ├── geo3k │ │ │ ├── run_qwen2.5-3b_geo3k_multiturn.sh │ │ │ ├── run_qwen2.5-3b_geo3k_multiturn_4xgpu.sh │ │ │ └── run_qwen2.5-3b_megatron_geo3k_multiturn.sh │ │ ├── run_qwen0.5b_gsm8k_multiturn_curriculum.sh │ │ ├── run_qwen2.5-0.5b_gsm8k_multiturn_w_interaction.sh │ │ ├── run_qwen2.5-3b_gsm8k_multiturn.sh │ │ ├── run_qwen2.5-3b_gsm8k_multiturn_4xgpu.sh │ │ ├── run_qwen2.5-3b_megatron_gsm8k_multiturn.sh │ │ ├── run_qwen3-4b_gsm8k_multiturn.sh │ │ └── search_r1_like │ │ │ ├── local_dense_retriever │ │ │ ├── download.py │ │ │ └── retrieval_server.py │ │ │ └── run_qwen2.5-3b_instruct_search_multiturn.sh │ ├── slurm │ │ └── ray_on_slurm.slurm │ ├── split_placement │ │ ├── README.md │ │ ├── config │ │ │ └── ppo_trainer_split.yaml │ │ ├── main_ppo_split.py │ │ ├── run_deepseek7b_llm.sh │ │ └── split_monkey_patch.py │ └── tuning │ │ ├── 0.5b │ │ └── qwen2-0.5b_grpo-lora_1_h100_fsdp_vllm.sh │ │ ├── 1.5b │ │ └── qwen2-1.5b_grpo-lora_1_h100_fsdp_vllm.sh │ │ ├── 14b │ │ ├── qwen2-14b_grpo-lora_2_h100_fsdp_vllm.sh │ │ └── qwen2_14b_grpo_4_h800_fsdp_vllm.sh │ │ ├── 32b │ │ ├── qwen2-32b_grpo-lora_4_h100_fsdp_vllm.sh │ │ └── qwen2_32B_grpo_8_h20_megatron_vllm.sh │ │ ├── 3b │ │ └── qwen2-3b_grpo-lora_1_h100_fsdp_vllm.sh │ │ ├── 70b │ │ ├── qwen2-70b_grpo_32_h20_fsdp_vllm.sh │ │ ├── qwen2-70b_grpo_32_h800_fsdp_vllm.sh │ │ └── qwen2-72b_grpo-lora_8_h100_fsdp_vllm.sh │ │ └── 7b │ │ ├── qwen2-7b_grpo-lora_1_h100_fsdp_vllm.sh │ │ └── qwen2-7b_grpo_2_h800_fsdp_vllm.sh ├── my_reward │ └── code.py ├── pyproject.toml ├── recipe │ ├── README.md │ ├── char_count │ │ ├── README.md │ │ ├── create_dataset.py │ │ ├── reward_function.py │ │ ├── train_grpo.sh │ │ └── train_sft.sh │ ├── dapo │ │ ├── README.md │ │ ├── config │ │ │ └── dapo_trainer.yaml │ │ ├── dapo_ray_trainer.py │ │ ├── main_dapo.py │ │ ├── prepare_dapo_data.sh │ │ ├── run_dapo_early_qwen2.5_32b.sh │ │ ├── run_dapo_qwen2.5_32b.sh │ │ ├── run_dapo_wo_ds_qwen2.5_32b.sh │ │ ├── runtime_env.yaml │ │ ├── test_dapo_7b.sh │ │ ├── test_dapo_7b_math.sh │ │ ├── test_dapo_7b_math_lora.sh │ │ ├── test_dapo_7b_math_megatron.sh │ │ ├── test_dapo_dspk_671b_megatron.sh │ │ └── test_dapo_qwen3_30b_math.sh │ ├── entropy │ │ ├── 32b_clip_cov.sh │ │ ├── 32b_kl_cov.sh │ │ ├── 32b_kl_cov_mininbsz.sh │ │ ├── 7b_clip_cov.sh │ │ ├── 7b_kl_cov.sh │ │ ├── README.md │ │ ├── config │ │ │ └── entropy_trainer.yaml │ │ ├── entropy_ray_trainer.py │ │ ├── main_entropy.py │ │ ├── reward.py │ │ └── reward_score │ │ │ ├── __init__.py │ │ │ └── entropy_math │ │ │ ├── __init__.py │ │ │ ├── grader.py │ │ │ └── math_normalize.py │ ├── genrm_remote │ │ ├── README.md │ │ ├── reward_function.py │ │ └── run_genrm_remote.sh │ ├── minicpmo │ │ └── rl_dataset.py │ ├── prime │ │ ├── __init__.py │ │ ├── config │ │ │ └── prime_trainer.yaml │ │ ├── main_prime.py │ │ ├── prime_core_algos.py │ │ ├── prime_dp_rm.py │ │ ├── prime_fsdp_workers.py │ │ ├── prime_ray_trainer.py │ │ ├── run_prime_qwen.sh │ │ └── run_prime_qwen_code.sh │ ├── r1 │ │ ├── README.md │ │ ├── __init__.py │ │ ├── config │ │ │ └── evaluation.yaml │ │ ├── data_process.py │ │ ├── main_eval.py │ │ ├── reward_score.py │ │ ├── run_r1_distill_qwen.sh │ │ └── tasks │ │ │ ├── __init__.py │ │ │ ├── gpqa.py │ │ │ ├── livecodebench.py │ │ │ └── math.py │ ├── retool │ │ ├── retool.py │ │ ├── retool_multi_turn_sft_preprocess.py │ │ ├── retool_sft_preprocess.py │ │ ├── run_qwen2-32b_sft.sh │ │ ├── run_qwen2.5_32b_sp8.sh │ │ ├── run_qwen2.5_7b_sp4.sh │ │ ├── run_qwen3_4b_sp4.sh │ │ └── sandbox_fusion_tool_config.yaml │ ├── spin │ │ ├── README.md │ │ ├── config │ │ │ └── spin_trainer.yaml │ │ ├── core_algos.py │ │ ├── dp_actor.py │ │ ├── fsdp_workers.py │ │ ├── main_spin.py │ │ ├── run_spin.sh │ │ └── spin_trainer.py │ └── sppo │ │ ├── README.md │ │ ├── __init__.py │ │ ├── config │ │ └── sppo_trainer.yaml │ │ ├── dp_actor.py │ │ ├── main_sppo.py │ │ ├── run_qwen2.5-7b_rm.sh │ │ ├── sppo_ray_trainer.py │ │ └── sppo_worker.py ├── requirements-npu.txt ├── requirements.txt ├── requirements_sglang.txt ├── scripts │ ├── __init__.py │ ├── converter_hf_to_mcore.py │ ├── diagnose.py │ ├── init_random_model.py │ ├── install_vllm_sglang_mcore.sh │ └── legacy_model_merger.py ├── search_r1 │ ├── __init__.py │ ├── llm_agent │ │ ├── __init__.py │ │ ├── generation.py │ │ └── tensor_helper.py │ └── search │ │ ├── build_index.sh │ │ ├── google_search_server.py │ │ ├── index_builder.py │ │ ├── rerank_server.py │ │ ├── retrieval.py │ │ ├── retrieval.sh │ │ ├── retrieval_request.py │ │ ├── retrieval_rerank_server.py │ │ ├── retrieval_server.py │ │ └── serp_search_server.py ├── setup.py ├── tests │ ├── README.md │ ├── __init__.py │ ├── experimental │ │ └── agent_loop │ │ │ ├── agent_utils.py │ │ │ └── test_basic_agent_loop.py │ ├── interactions │ │ ├── __init__.py │ │ ├── test_gsm8k_interaction.py │ │ └── test_interaction_registry.py │ ├── kill_github_tests.sh │ ├── models │ │ ├── test_transformer.py │ │ └── test_transformers_ulysses.py │ ├── single_controller │ │ ├── __init__.py │ │ ├── base │ │ │ └── test_decorator.py │ │ ├── check_worker_alive │ │ │ └── main.py │ │ ├── detached_worker │ │ │ ├── README.md │ │ │ ├── client.py │ │ │ ├── run.sh │ │ │ └── server.py │ │ ├── test_auto_padding_on_cpu.py │ │ ├── test_colocated_workers.py │ │ ├── test_colocated_workers_fused.py │ │ ├── test_data_transfer.py │ │ ├── test_decorator_on_cpu.py │ │ ├── test_driverfunc_to_worker.py │ │ ├── test_fused_workers_on_cpu.py │ │ ├── test_high_level_scheduling_api.py │ │ ├── test_ray_collectives.py │ │ ├── test_ray_local_envs_on_cpu.py │ │ ├── test_ray_utils_on_cpu.py │ │ ├── test_rvdz.py │ │ ├── test_worker_group_basics.py │ │ └── test_worker_group_torch.py │ ├── special_distributed │ │ ├── README.md │ │ ├── run_all.sh │ │ ├── test_fsdp_ckpt.py │ │ └── test_tensor_dict.py │ ├── special_e2e │ │ ├── README.md │ │ ├── __init__.py │ │ ├── check_custom_rwd_fn.py │ │ ├── check_results.py │ │ ├── envs │ │ │ ├── __init__.py │ │ │ └── digit_completion │ │ │ │ ├── __init__.py │ │ │ │ ├── task.py │ │ │ │ └── tokenizer.py │ │ ├── generation │ │ │ └── run_gen_qwen05.sh │ │ ├── ppo_trainer │ │ │ ├── expert_parallel │ │ │ │ └── qwen2moe_minimal.json │ │ │ ├── run_function_reward.sh │ │ │ ├── run_model_reward.sh │ │ │ └── run_single_gpu.sh │ │ ├── run_dapo.sh │ │ ├── run_genrm_remote.sh │ │ ├── run_geo3k_fsdp_sgl_multiturn_w_tool.sh │ │ ├── run_grpo_lora_with_merge.sh │ │ ├── run_gsm8k_fsdp_sgl_multiturn_sf_tool.sh │ │ ├── run_gsm8k_fsdp_sgl_multiturn_w_tool.sh │ │ ├── run_ppo_trainer_megatron.sh │ │ ├── run_prime.sh │ │ ├── run_r1_distill_qwen_aime24_eval.sh │ │ ├── run_spin.sh │ │ ├── run_sppo.sh │ │ ├── run_test.sh │ │ └── sft │ │ │ ├── run_sft.sh │ │ │ └── test_sp_loss_match.py │ ├── special_npu │ │ ├── run_qwen2_5_05b_dapo.sh │ │ ├── run_qwen2_5_05b_grpo.sh │ │ ├── run_qwen2_5_05b_sft_peft_sp2.sh │ │ └── run_qwen2_5_vl_3b_npu.sh │ ├── special_sanity │ │ ├── check_api_docs.py │ │ ├── check_device_api_usage.py │ │ ├── check_docs_time_info.py │ │ ├── check_license.py │ │ ├── check_pr_description.py │ │ ├── check_pr_title.py │ │ ├── test_config_docs.py │ │ ├── test_import.py │ │ ├── type_coverage_check.py │ │ ├── validate_imported_docs.py │ │ └── validate_structure.py │ ├── special_standalone │ │ ├── README.md │ │ └── test_memory_buffers.py │ ├── test_base_config_on_cpu.py │ ├── test_protocol_on_cpu.py │ ├── tools │ │ └── test_base_tool_on_cpu.py │ ├── trainer │ │ ├── __init__.py │ │ ├── config │ │ │ ├── __init__.py │ │ │ └── test_algo_config_on_cpu.py │ │ └── ppo │ │ │ ├── __init__.py │ │ │ ├── test_core_algos_on_cpu.py │ │ │ └── test_metric_utils_on_cpu.py │ ├── utils │ │ ├── _test_module.py │ │ ├── ckpt │ │ │ └── test_esi_save_ckpt_on_cpu.py │ │ ├── dataset │ │ │ ├── test_create_rl_sampler_on_cpu.py │ │ │ ├── test_multiturn_sft_dataset_on_cpu.py │ │ │ ├── test_rl_dataset_on_cpu.py │ │ │ └── test_sft_dataset_on_cpu.py │ │ ├── megatron │ │ │ └── test_pipeline_parallel.py │ │ ├── reward_score │ │ │ ├── reward_score │ │ │ │ └── test_sandbox_fusion_on_cpu.py │ │ │ └── test_sandbox_on_cpu.py │ │ ├── test_activation_offload.py │ │ ├── test_config_on_cpu.py │ │ ├── test_flops_counter.py │ │ ├── test_fs_on_cpu.py │ │ ├── test_import_utils_on_cpu.py │ │ ├── test_linear_cross_entropy.py │ │ ├── test_linear_cross_entropy_tp.py │ │ ├── test_model_on_cpu.py │ │ ├── test_nvtx_profile.py │ │ ├── test_seqlen_balancing.py │ │ ├── test_timeout_decorator_cpu.py │ │ └── test_torch_functional.py │ └── workers │ │ ├── reward_manager │ │ └── test_registry_on_cpu.py │ │ └── rollout │ │ ├── async_rollout_utils.py │ │ ├── perf │ │ └── vllm_async_rollout.py │ │ ├── resource │ │ └── tool_configs │ │ │ ├── mcp_server.json │ │ │ ├── mcp_tool_config │ │ │ ├── sandbox_fusion_tool_config │ │ │ └── search_tool_config │ │ ├── rollout_vllm │ │ ├── run_fsdp_vllm.py │ │ ├── test_vllm_chat_scheduler.py │ │ └── test_vllm_spmd.py │ │ ├── test_async_sglang_server.py │ │ ├── test_custom_completion_callback.py │ │ ├── test_hf_rollout.py │ │ ├── test_sglang_async_rollout_mcp_tools.py │ │ ├── test_sglang_async_rollout_multimodal_delta.py │ │ ├── test_sglang_async_rollout_search_tools.py │ │ ├── test_sglang_async_rollout_sf_tools.py │ │ ├── test_sglang_async_rollout_w_interaction.py │ │ ├── test_sglang_async_rollout_w_tools.py │ │ ├── test_sglang_multi_interaction.py │ │ ├── test_sglang_spmd.py │ │ └── utils_sglang.py └── verl │ ├── __init__.py │ ├── base_config.py │ ├── experimental │ ├── __init__.py │ └── agent_loop │ │ ├── __init__.py │ │ ├── agent_loop.py │ │ ├── single_turn_agent_loop.py │ │ └── tool_agent_loop.py │ ├── interactions │ ├── __init__.py │ ├── base.py │ ├── gsm8k_interaction.py │ └── utils │ │ ├── __init__.py │ │ └── interaction_registry.py │ ├── model_merger │ ├── __init__.py │ ├── __main__.py │ ├── base_model_merger.py │ ├── fsdp_model_merger.py │ └── megatron_model_merger.py │ ├── models │ ├── README.md │ ├── __init__.py │ ├── llama │ │ ├── __init__.py │ │ └── megatron │ │ │ ├── __init__.py │ │ │ ├── checkpoint_utils │ │ │ ├── __init__.py │ │ │ ├── llama_loader.py │ │ │ ├── llama_loader_depracated.py │ │ │ └── llama_saver.py │ │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── parallel_attention.py │ │ │ ├── parallel_decoder.py │ │ │ ├── parallel_linear.py │ │ │ ├── parallel_mlp.py │ │ │ └── parallel_rmsnorm.py │ │ │ └── modeling_llama_megatron.py │ ├── mcore │ │ ├── __init__.py │ │ ├── config_converter.py │ │ ├── loader.py │ │ ├── mbridge.py │ │ ├── model_forward.py │ │ ├── model_initializer.py │ │ ├── patch_v012.py │ │ ├── qwen2_5_vl │ │ │ ├── __init__.py │ │ │ ├── attention.py │ │ │ ├── model.py │ │ │ ├── rope_utils.py │ │ │ ├── vision_config.py │ │ │ ├── vision_model.py │ │ │ └── vision_transformer_block.py │ │ ├── readme.md │ │ ├── registry.py │ │ ├── saver.py │ │ ├── util.py │ │ └── weight_converter.py │ ├── qwen2 │ │ ├── __init__.py │ │ └── megatron │ │ │ ├── __init__.py │ │ │ ├── checkpoint_utils │ │ │ ├── __init__.py │ │ │ ├── qwen2_loader.py │ │ │ ├── qwen2_loader_depracated.py │ │ │ └── qwen2_saver.py │ │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── parallel_attention.py │ │ │ ├── parallel_decoder.py │ │ │ ├── parallel_linear.py │ │ │ ├── parallel_mlp.py │ │ │ └── parallel_rmsnorm.py │ │ │ └── modeling_qwen2_megatron.py │ ├── registry.py │ ├── transformers │ │ ├── __init__.py │ │ ├── dense_common.py │ │ ├── kimi_vl.py │ │ ├── llama.py │ │ ├── monkey_patch.py │ │ ├── npu_patch.py │ │ ├── qwen2.py │ │ ├── qwen2_5_vl.py │ │ └── qwen2_vl.py │ └── weight_loader_registry.py │ ├── protocol.py │ ├── single_controller │ ├── __init__.py │ ├── base │ │ ├── __init__.py │ │ ├── decorator.py │ │ ├── megatron │ │ │ ├── __init__.py │ │ │ ├── worker.py │ │ │ └── worker_group.py │ │ ├── register_center │ │ │ ├── __init__.py │ │ │ └── ray.py │ │ ├── worker.py │ │ └── worker_group.py │ └── ray │ │ ├── __init__.py │ │ ├── base.py │ │ └── megatron.py │ ├── third_party │ ├── __init__.py │ ├── sglang │ │ ├── __init__.py │ │ └── parallel_state.py │ └── vllm │ │ └── __init__.py │ ├── tools │ ├── __init__.py │ ├── base_tool.py │ ├── geo3k_tool.py │ ├── gsm8k_tool.py │ ├── mcp_base_tool.py │ ├── mcp_search_tool.py │ ├── sandbox_fusion_tools.py │ ├── schemas.py │ ├── search_tool.py │ └── utils │ │ ├── __init__.py │ │ ├── mcp_clients │ │ ├── McpClientManager.py │ │ └── utils.py │ │ ├── search_r1_like_utils.py │ │ └── tool_registry.py │ ├── trainer │ ├── __init__.py │ ├── config │ │ ├── __init__.py │ │ ├── algorithm.py │ │ ├── evaluation.yaml │ │ ├── generation.yaml │ │ ├── grpo_trainer.yaml │ │ ├── ppo_megatron_trainer.yaml │ │ ├── ppo_trainer.yaml │ │ └── sft_trainer.yaml │ ├── constants_ppo.py │ ├── fsdp_sft_trainer.py │ ├── main_eval.py │ ├── main_generation.py │ ├── main_ppo.py │ ├── ppo │ │ ├── __init__.py │ │ ├── core_algos.py │ │ ├── metric_utils.py │ │ ├── ray_trainer.py │ │ └── reward.py │ └── runtime_env.yaml │ ├── utils │ ├── __init__.py │ ├── activation_offload.py │ ├── checkpoint │ │ ├── __init__.py │ │ ├── checkpoint_manager.py │ │ ├── fsdp_checkpoint_manager.py │ │ └── megatron_checkpoint_manager.py │ ├── config.py │ ├── dataset │ │ ├── README.md │ │ ├── __init__.py │ │ ├── multiturn_sft_dataset.py │ │ ├── rl_dataset.py │ │ ├── rm_dataset.py │ │ ├── sampler.py │ │ ├── sft_dataset.py │ │ └── vision_utils.py │ ├── debug │ │ ├── __init__.py │ │ ├── performance.py │ │ └── trajectory_tracker.py │ ├── device.py │ ├── distributed.py │ ├── experimental │ │ ├── __init__.py │ │ └── torch_functional.py │ ├── flops_counter.py │ ├── fs.py │ ├── fsdp_utils.py │ ├── hdfs_io.py │ ├── import_utils.py │ ├── kernel │ │ ├── __init__.py │ │ ├── kernels.py │ │ └── linear_cross_entropy.py │ ├── logger │ │ ├── __init__.py │ │ └── aggregate_logger.py │ ├── logging_utils.py │ ├── megatron │ │ ├── __init__.py │ │ ├── dist_checkpointing.py │ │ ├── memory.py │ │ ├── optimizer.py │ │ ├── pipeline_parallel.py │ │ ├── sequence_parallel.py │ │ └── tensor_parallel.py │ ├── megatron_utils.py │ ├── memory_buffer.py │ ├── metric │ │ ├── __init__.py │ │ └── utils.py │ ├── model.py │ ├── net_utils.py │ ├── profiler │ │ ├── __init__.py │ │ ├── config.py │ │ ├── empty_annotations.py │ │ ├── nvtx_profile.py │ │ ├── performance.py │ │ └── profile.py │ ├── py_functional.py │ ├── ray_utils.py │ ├── rendezvous │ │ ├── __init__.py │ │ └── ray_backend.py │ ├── reward_score │ │ ├── __init__.py │ │ ├── geo3k.py │ │ ├── gsm8k.py │ │ ├── math.py │ │ ├── math_batch.py │ │ ├── math_dapo.py │ │ ├── math_verify.py │ │ ├── prime_code │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── testing_util.py │ │ │ └── utils.py │ │ ├── prime_math │ │ │ ├── __init__.py │ │ │ ├── grader.py │ │ │ └── math_normalize.py │ │ ├── sandbox_fusion │ │ │ ├── __init__.py │ │ │ └── utils.py │ │ └── search_r1_like_qa_em.py │ ├── seqlen_balancing.py │ ├── tokenizer.py │ ├── torch_dtypes.py │ ├── torch_functional.py │ ├── tracking.py │ ├── ulysses.py │ └── vllm_utils.py │ ├── version │ └── version │ └── workers │ ├── __init__.py │ ├── actor │ ├── __init__.py │ ├── base.py │ ├── dp_actor.py │ └── megatron_actor.py │ ├── critic │ ├── __init__.py │ ├── base.py │ ├── dp_critic.py │ └── megatron_critic.py │ ├── fsdp_workers.py │ ├── megatron_workers.py │ ├── reward_manager │ ├── __init__.py │ ├── batch.py │ ├── dapo.py │ ├── naive.py │ ├── prime.py │ └── registry.py │ ├── reward_model │ ├── __init__.py │ ├── base.py │ └── megatron │ │ ├── __init__.py │ │ └── reward_model.py │ ├── rollout │ ├── __init__.py │ ├── async_server.py │ ├── base.py │ ├── chat_scheduler.py │ ├── hf_rollout.py │ ├── naive │ │ ├── __init__.py │ │ └── naive_rollout.py │ ├── schemas.py │ ├── sglang_rollout │ │ ├── __init__.py │ │ ├── async_sglang_server.py │ │ ├── sglang_rollout.py │ │ └── utils.py │ ├── tokenizer.py │ └── vllm_rollout │ │ ├── __init__.py │ │ ├── vllm_async_server.py │ │ └── vllm_rollout_spmd.py │ └── sharding_manager │ ├── __init__.py │ ├── base.py │ ├── fsdp_sglang.py │ ├── fsdp_ulysses.py │ ├── fsdp_vllm.py │ ├── megatron_sglang.py │ └── megatron_vllm.py ├── dapo_from_scratch ├── dapo_loss.png ├── grpo_loss.png ├── reward_func.py ├── test.py └── train.py ├── date_modify.ipynb ├── deep_research ├── client.py ├── prompts.py ├── search_mcp.py ├── searxng │ ├── docker-compose.yaml │ └── settings.yml ├── test.log └── test.md ├── deepseek_learn ├── 1.png ├── 2.png ├── 3.png ├── MLA.py ├── MTP_train │ ├── MTP.py │ ├── loss.png │ └── test.py ├── deepseek_r1_train │ ├── deepseek-r1解读.md │ ├── deepseek_r1_train.py │ └── deepseek_reward.png ├── dsa │ ├── README.md │ ├── dataset.py │ ├── get_data.py │ ├── model.py │ ├── train.py │ └── warmup_train.py └── test.ipynb ├── gradio_mcp_client.py ├── grpo_from_scratch ├── grpo_loss.png ├── reward_func.py ├── test.py └── train.py ├── knowledge_distillation_embedding ├── README.md ├── data_process.py ├── dataset.py ├── evaluation.py ├── get_distillation_data_local.py ├── get_distillation_data_openai.py ├── merge.py └── train.py ├── knowledge_distillation_llm ├── README.md ├── dataset.py ├── images │ ├── fkl.png │ ├── fkl_formula.png │ ├── rkl.png │ └── rkl_formula.png ├── on_policy_distillation_train.py ├── on_policy_distillation_train_rl.py ├── train.py └── utils.py ├── knowledge_distillation_llm_cross_tokenizer ├── README.md ├── dataset.py ├── example.json ├── train.py └── utils.py ├── langgraph_agent ├── README.md ├── graph.py ├── nodes.py ├── prompts.py ├── state.py └── tools.py ├── pdf2markdown ├── README.md └── pdf2markdown.py ├── ppo_from_scratch ├── ppo.png └── ppo_train.py ├── rag_demo ├── medical_data.txt └── rag.ipynb ├── reinforce++ ├── data_process.ipynb └── train_reinforce++.py ├── remax ├── data_process.ipynb └── train_remax.py ├── rloo ├── data_process.ipynb └── train_rloo.py ├── s1_from_scratch ├── README.md ├── generate.py ├── s1_train.py └── training_loss.png ├── table_extract ├── README.md ├── chinese_cht.ttf ├── imgs │ ├── 2.jpg │ └── extract_2.png └── table2txt.ipynb ├── table_rag.ipynb ├── train_llm_from_scratch ├── README.md ├── dataset.py ├── dpo.png ├── dpo_train.py ├── screenshot-20241207-093824.png ├── sft_train.py ├── test_llm.ipynb ├── tokenizer │ ├── merges.txt │ ├── tokenizer.json │ ├── tokenizer_config.json │ └── vocab.json ├── train.ipynb ├── train.py ├── train_tokenizer.ipynb ├── trainer_state_pretrain.json └── trainer_state_sft.json ├── train_moe_from_scratch ├── 1.ipynb ├── README.md ├── dataset.py ├── moe_sft_train.py ├── moe_test.py ├── moe_train.py ├── screenshot-20241207-093824.png ├── sft.jsonl ├── tokenizer │ ├── merges.txt │ ├── tokenizer.json │ ├── tokenizer_config.json │ └── vocab.json └── train.jsonl ├── train_multimodal_from_scratch ├── README.md ├── gradio_vlm.py ├── sft_train.py ├── sft_train_multi_images.py ├── test.ipynb ├── test.py ├── test_multi_images.py ├── train.py └── trainer.ipynb ├── train_qwen3_next_from_scratch ├── README.md ├── dataset.py ├── moe_test.py ├── pretrain.jpg ├── pretrain.py ├── sft.jpg ├── sft_train.py ├── test_moe.ipynb └── tokenizer │ ├── merges.txt │ ├── tokenizer.json │ ├── tokenizer_config.json │ └── vocab.json ├── train_siglip_from_scratch ├── clip.png ├── data_process.ipynb ├── dataset.py ├── model.py ├── siglip.png ├── test.ipynb ├── train.py └── 伪代码.png └── training-free_grpo ├── .env ├── compress.py ├── prompts.py └── train.py /README.md: -------------------------------------------------------------------------------- 1 | # 复现各种大模型相关算法 2 | -------------------------------------------------------------------------------- /all_embd_to_openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/all_embd_to_openai.py -------------------------------------------------------------------------------- /all_to_tool_call/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/all_to_tool_call/README.md -------------------------------------------------------------------------------- /all_to_tool_call/all_to_tool_call.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/all_to_tool_call/all_to_tool_call.py -------------------------------------------------------------------------------- /all_to_tool_call/test.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/all_to_tool_call/test.ipynb -------------------------------------------------------------------------------- /code-r1/.gemini/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.gemini/config.yaml -------------------------------------------------------------------------------- /code-r1/.github/CODEOWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/CODEOWNERS -------------------------------------------------------------------------------- /code-r1/.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /code-r1/.github/dependabot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/dependabot.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/check-pr-title.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/check-pr-title.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/checkpoint_converter.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/checkpoint_converter.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/cpu_unit_tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/cpu_unit_tests.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/disabled/e2e_prime.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/disabled/e2e_prime.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/doc.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/doc.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/e2e_ascend.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/e2e_ascend.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/e2e_dapo.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/e2e_dapo.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/e2e_eval_aime24.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/e2e_eval_aime24.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/e2e_genrm_remote.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/e2e_genrm_remote.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/e2e_ppo_trainer.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/e2e_ppo_trainer.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/e2e_ppo_trainer_megatron_sglang.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/e2e_ppo_trainer_megatron_sglang.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/e2e_ppo_trainer_megatron_vllm.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/e2e_ppo_trainer_megatron_vllm.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/e2e_sft.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/e2e_sft.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/e2e_spin.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/e2e_spin.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/e2e_sppo.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/e2e_sppo.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/gpu_unit_tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/gpu_unit_tests.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/model.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/model.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/pre-commit-full.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/pre-commit-full.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/pre-commit.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/sanity.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/sanity.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/scorecard.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/scorecard.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/secrets_scan.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/secrets_scan.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/sgl.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/sgl.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/type-coverage-check.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/type-coverage-check.yml -------------------------------------------------------------------------------- /code-r1/.github/workflows/vllm.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.github/workflows/vllm.yml -------------------------------------------------------------------------------- /code-r1/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.gitignore -------------------------------------------------------------------------------- /code-r1/.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.pre-commit-config.yaml -------------------------------------------------------------------------------- /code-r1/.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/.readthedocs.yaml -------------------------------------------------------------------------------- /code-r1/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/CONTRIBUTING.md -------------------------------------------------------------------------------- /code-r1/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/LICENSE -------------------------------------------------------------------------------- /code-r1/Notice.txt: -------------------------------------------------------------------------------- 1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates -------------------------------------------------------------------------------- /code-r1/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/README.md -------------------------------------------------------------------------------- /code-r1/code-r1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/code-r1.png -------------------------------------------------------------------------------- /code-r1/data/data_process.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/data/data_process.ipynb -------------------------------------------------------------------------------- /code-r1/docker/Apptainerfile.rocm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docker/Apptainerfile.rocm -------------------------------------------------------------------------------- /code-r1/docker/Dockerfile.extention.awsefa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docker/Dockerfile.extention.awsefa -------------------------------------------------------------------------------- /code-r1/docker/Dockerfile.ngc.vllm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docker/Dockerfile.ngc.vllm -------------------------------------------------------------------------------- /code-r1/docker/Dockerfile.ngc.vllm0.8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docker/Dockerfile.ngc.vllm0.8 -------------------------------------------------------------------------------- /code-r1/docker/Dockerfile.ngc.vllm0.8.sagemaker: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docker/Dockerfile.ngc.vllm0.8.sagemaker -------------------------------------------------------------------------------- /code-r1/docker/Dockerfile.rocm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docker/Dockerfile.rocm -------------------------------------------------------------------------------- /code-r1/docker/Dockerfile.sglang: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docker/Dockerfile.sglang -------------------------------------------------------------------------------- /code-r1/docker/Dockerfile.vemlp.vllm.te: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docker/Dockerfile.vemlp.vllm.te -------------------------------------------------------------------------------- /code-r1/docker/Dockerfile.vllm.sglang.megatron.deepseek: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docker/Dockerfile.vllm.sglang.megatron.deepseek -------------------------------------------------------------------------------- /code-r1/docker/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docker/README.md -------------------------------------------------------------------------------- /code-r1/docker/verl0.4-cu124-torch2.6-fa2.7.4/Dockerfile.base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docker/verl0.4-cu124-torch2.6-fa2.7.4/Dockerfile.base -------------------------------------------------------------------------------- /code-r1/docker/verl0.4-cu124-torch2.6-fa2.7.4/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docker/verl0.4-cu124-torch2.6-fa2.7.4/README.md -------------------------------------------------------------------------------- /code-r1/docker/verl0.5-cu126-torch2.7.1-fa2.8.0/Dockerfile.base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docker/verl0.5-cu126-torch2.7.1-fa2.8.0/Dockerfile.base -------------------------------------------------------------------------------- /code-r1/docker/verl0.5-cu126-torch2.7.1-fa2.8.0/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docker/verl0.5-cu126-torch2.7.1-fa2.8.0/README.md -------------------------------------------------------------------------------- /code-r1/docker/verl0.5-preview-cu128-torch2.7.1-fa2.8.0/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docker/verl0.5-preview-cu128-torch2.7.1-fa2.8.0/README.md -------------------------------------------------------------------------------- /code-r1/docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/Makefile -------------------------------------------------------------------------------- /code-r1/docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/README.md -------------------------------------------------------------------------------- /code-r1/docs/README_vllm0.7.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/README_vllm0.7.md -------------------------------------------------------------------------------- /code-r1/docs/README_vllm0.8.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/README_vllm0.8.md -------------------------------------------------------------------------------- /code-r1/docs/_static/js/runllm-widget.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/_static/js/runllm-widget.js -------------------------------------------------------------------------------- /code-r1/docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/_static/logo.png -------------------------------------------------------------------------------- /code-r1/docs/advance/checkpoint.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/advance/checkpoint.rst -------------------------------------------------------------------------------- /code-r1/docs/advance/dpo_extension.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/advance/dpo_extension.rst -------------------------------------------------------------------------------- /code-r1/docs/advance/fsdp_extension.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/advance/fsdp_extension.rst -------------------------------------------------------------------------------- /code-r1/docs/advance/megatron_extension.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/advance/megatron_extension.rst -------------------------------------------------------------------------------- /code-r1/docs/advance/placement.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/advance/placement.rst -------------------------------------------------------------------------------- /code-r1/docs/advance/ppo_lora.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/advance/ppo_lora.rst -------------------------------------------------------------------------------- /code-r1/docs/advance/rope.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/advance/rope.rst -------------------------------------------------------------------------------- /code-r1/docs/algo/baseline.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/algo/baseline.md -------------------------------------------------------------------------------- /code-r1/docs/algo/dapo.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/algo/dapo.md -------------------------------------------------------------------------------- /code-r1/docs/algo/entropy.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/algo/entropy.md -------------------------------------------------------------------------------- /code-r1/docs/algo/gpg.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/algo/gpg.md -------------------------------------------------------------------------------- /code-r1/docs/algo/grpo.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/algo/grpo.md -------------------------------------------------------------------------------- /code-r1/docs/algo/opo.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/algo/opo.md -------------------------------------------------------------------------------- /code-r1/docs/algo/ppo.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/algo/ppo.md -------------------------------------------------------------------------------- /code-r1/docs/algo/spin.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/algo/spin.md -------------------------------------------------------------------------------- /code-r1/docs/algo/sppo.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/algo/sppo.md -------------------------------------------------------------------------------- /code-r1/docs/amd_tutorial/amd_build_dockerfile_page.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/amd_tutorial/amd_build_dockerfile_page.rst -------------------------------------------------------------------------------- /code-r1/docs/amd_tutorial/amd_vllm_page.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/amd_tutorial/amd_vllm_page.rst -------------------------------------------------------------------------------- /code-r1/docs/api/data.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/api/data.rst -------------------------------------------------------------------------------- /code-r1/docs/api/single_controller.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/api/single_controller.rst -------------------------------------------------------------------------------- /code-r1/docs/api/trainer.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/api/trainer.rst -------------------------------------------------------------------------------- /code-r1/docs/api/utils.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/api/utils.rst -------------------------------------------------------------------------------- /code-r1/docs/ascend_tutorial/ascend_quick_start.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/ascend_tutorial/ascend_quick_start.rst -------------------------------------------------------------------------------- /code-r1/docs/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/conf.py -------------------------------------------------------------------------------- /code-r1/docs/examples/config.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/examples/config.rst -------------------------------------------------------------------------------- /code-r1/docs/examples/gsm8k_example.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/examples/gsm8k_example.rst -------------------------------------------------------------------------------- /code-r1/docs/examples/multi_modal_example.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/examples/multi_modal_example.rst -------------------------------------------------------------------------------- /code-r1/docs/examples/ppo_code_architecture.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/examples/ppo_code_architecture.rst -------------------------------------------------------------------------------- /code-r1/docs/examples/sandbox_fusion_example.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/examples/sandbox_fusion_example.rst -------------------------------------------------------------------------------- /code-r1/docs/faq/faq.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/faq/faq.rst -------------------------------------------------------------------------------- /code-r1/docs/hybrid_flow.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/hybrid_flow.rst -------------------------------------------------------------------------------- /code-r1/docs/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/index.rst -------------------------------------------------------------------------------- /code-r1/docs/perf/device_tuning.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/perf/device_tuning.rst -------------------------------------------------------------------------------- /code-r1/docs/perf/dpsk.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/perf/dpsk.md -------------------------------------------------------------------------------- /code-r1/docs/perf/nsight_profiling.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/perf/nsight_profiling.md -------------------------------------------------------------------------------- /code-r1/docs/perf/perf_tuning.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/perf/perf_tuning.rst -------------------------------------------------------------------------------- /code-r1/docs/preparation/prepare_data.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/preparation/prepare_data.rst -------------------------------------------------------------------------------- /code-r1/docs/preparation/reward_function.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/preparation/reward_function.rst -------------------------------------------------------------------------------- /code-r1/docs/requirements-docs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/requirements-docs.txt -------------------------------------------------------------------------------- /code-r1/docs/sglang_multiturn/interaction_system.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/sglang_multiturn/interaction_system.rst -------------------------------------------------------------------------------- /code-r1/docs/sglang_multiturn/multiturn.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/sglang_multiturn/multiturn.rst -------------------------------------------------------------------------------- /code-r1/docs/sglang_multiturn/sandbox_fusion.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/sglang_multiturn/sandbox_fusion.rst -------------------------------------------------------------------------------- /code-r1/docs/sglang_multiturn/search_tool_example.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/sglang_multiturn/search_tool_example.rst -------------------------------------------------------------------------------- /code-r1/docs/single_controller.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/single_controller.rst -------------------------------------------------------------------------------- /code-r1/docs/start/install.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/start/install.rst -------------------------------------------------------------------------------- /code-r1/docs/start/more_resources.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/start/more_resources.rst -------------------------------------------------------------------------------- /code-r1/docs/start/multinode.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/start/multinode.rst -------------------------------------------------------------------------------- /code-r1/docs/start/quickstart.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/start/quickstart.rst -------------------------------------------------------------------------------- /code-r1/docs/start/ray_debug_tutorial.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/start/ray_debug_tutorial.rst -------------------------------------------------------------------------------- /code-r1/docs/workers/fsdp_workers.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/workers/fsdp_workers.rst -------------------------------------------------------------------------------- /code-r1/docs/workers/megatron_workers.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/workers/megatron_workers.rst -------------------------------------------------------------------------------- /code-r1/docs/workers/ray_trainer.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/workers/ray_trainer.rst -------------------------------------------------------------------------------- /code-r1/docs/workers/sglang_worker.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/docs/workers/sglang_worker.rst -------------------------------------------------------------------------------- /code-r1/examples/data_preprocess/aime2024_multiturn_w_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/data_preprocess/aime2024_multiturn_w_tool.py -------------------------------------------------------------------------------- /code-r1/examples/data_preprocess/dapo_multiturn_w_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/data_preprocess/dapo_multiturn_w_tool.py -------------------------------------------------------------------------------- /code-r1/examples/data_preprocess/full_hh_rlhf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/data_preprocess/full_hh_rlhf.py -------------------------------------------------------------------------------- /code-r1/examples/data_preprocess/geo3k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/data_preprocess/geo3k.py -------------------------------------------------------------------------------- /code-r1/examples/data_preprocess/geo3k_multiturn_w_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/data_preprocess/geo3k_multiturn_w_tool.py -------------------------------------------------------------------------------- /code-r1/examples/data_preprocess/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/data_preprocess/gsm8k.py -------------------------------------------------------------------------------- /code-r1/examples/data_preprocess/gsm8k_multiturn_w_interaction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/data_preprocess/gsm8k_multiturn_w_interaction.py -------------------------------------------------------------------------------- /code-r1/examples/data_preprocess/gsm8k_multiturn_w_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/data_preprocess/gsm8k_multiturn_w_tool.py -------------------------------------------------------------------------------- /code-r1/examples/data_preprocess/hellaswag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/data_preprocess/hellaswag.py -------------------------------------------------------------------------------- /code-r1/examples/data_preprocess/math_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/data_preprocess/math_dataset.py -------------------------------------------------------------------------------- /code-r1/examples/data_preprocess/multiturn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/data_preprocess/multiturn.py -------------------------------------------------------------------------------- /code-r1/examples/data_preprocess/preprocess_search_r1_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/data_preprocess/preprocess_search_r1_dataset.py -------------------------------------------------------------------------------- /code-r1/examples/generation/run_deepseek7b_mutli_node.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/generation/run_deepseek7b_mutli_node.sh -------------------------------------------------------------------------------- /code-r1/examples/generation/run_deepseek_v2_lite_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/generation/run_deepseek_v2_lite_math.sh -------------------------------------------------------------------------------- /code-r1/examples/gpg_trainer/gpg.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/gpg_trainer/gpg.md -------------------------------------------------------------------------------- /code-r1/examples/gpg_trainer/run_qwen2-7b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/gpg_trainer/run_qwen2-7b_math.sh -------------------------------------------------------------------------------- /code-r1/examples/gpg_trainer/run_qwen2-7b_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/gpg_trainer/run_qwen2-7b_math_megatron.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/README.md -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_deepseek671b_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_deepseek671b_math_megatron.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_deepseek7b_llm_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_deepseek7b_llm_math.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_deepseek7b_llm_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_deepseek7b_llm_math_megatron.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_minicpmo2_6.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_minicpmo2_6.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_moonlight16b_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_moonlight16b_math_megatron.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen2-7b.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen2-7b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen2-7b_math.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen2-7b_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen2-7b_math_megatron.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen2-7b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen2-7b_seq_balance.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen2-7b_sgl_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen2-7b_sgl_megatron.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen2_5-3b_gsm8k_grpo_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen2_5-3b_gsm8k_grpo_lora.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen2_5_32b_grpo_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen2_5_32b_grpo_npu.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen2_5_7b_grpo_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen2_5_7b_grpo_npu.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen2_5_vl-7b-megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen2_5_vl-7b-megatron.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen2_5_vl-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen2_5_vl-7b.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen2_5_vl-7b_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen2_5_vl-7b_lora.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen2_5_vl-7b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen2_5_vl-7b_seq_balance.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen2_5_vl_32b_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen2_5_vl_32b_npu.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen2_5_vl_3b_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen2_5_vl_3b_npu.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen2_5_vl_7b_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen2_5_vl_7b_npu.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen3-236b_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen3-236b_megatron.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen3-8b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen3-8b.sh -------------------------------------------------------------------------------- /code-r1/examples/grpo_trainer/run_qwen3moe-30b_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/grpo_trainer/run_qwen3moe-30b_megatron.sh -------------------------------------------------------------------------------- /code-r1/examples/ppo_trainer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/ppo_trainer/README.md -------------------------------------------------------------------------------- /code-r1/examples/ppo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/ppo_trainer/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /code-r1/examples/ppo_trainer/run_deepseek7b_llm_modelscope.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/ppo_trainer/run_deepseek7b_llm_modelscope.sh -------------------------------------------------------------------------------- /code-r1/examples/ppo_trainer/run_deepseek7b_llm_pfppo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/ppo_trainer/run_deepseek7b_llm_pfppo.sh -------------------------------------------------------------------------------- /code-r1/examples/ppo_trainer/run_deepseek7b_llm_sandbox_fusion.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/ppo_trainer/run_deepseek7b_llm_sandbox_fusion.sh -------------------------------------------------------------------------------- /code-r1/examples/ppo_trainer/run_deepseek7b_llm_sp2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/ppo_trainer/run_deepseek7b_llm_sp2.sh -------------------------------------------------------------------------------- /code-r1/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh -------------------------------------------------------------------------------- /code-r1/examples/ppo_trainer/run_gemma.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/ppo_trainer/run_gemma.sh -------------------------------------------------------------------------------- /code-r1/examples/ppo_trainer/run_qwen2-7b_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/ppo_trainer/run_qwen2-7b_rm.sh -------------------------------------------------------------------------------- /code-r1/examples/ppo_trainer/run_qwen2-7b_rm_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/ppo_trainer/run_qwen2-7b_rm_seq_balance.sh -------------------------------------------------------------------------------- /code-r1/examples/ppo_trainer/run_qwen2-7b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/ppo_trainer/run_qwen2-7b_seq_balance.sh -------------------------------------------------------------------------------- /code-r1/examples/ppo_trainer/run_qwen2-7b_sglang_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/ppo_trainer/run_qwen2-7b_sglang_seq_balance.sh -------------------------------------------------------------------------------- /code-r1/examples/ppo_trainer/run_qwen2.5-32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/ppo_trainer/run_qwen2.5-32b.sh -------------------------------------------------------------------------------- /code-r1/examples/ray/tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/ray/tutorial.ipynb -------------------------------------------------------------------------------- /code-r1/examples/remax_trainer/run_qwen2.5-3b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/remax_trainer/run_qwen2.5-3b_seq_balance.sh -------------------------------------------------------------------------------- /code-r1/examples/remax_trainer/run_qwen2.5-7b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/remax_trainer/run_qwen2.5-7b_seq_balance.sh -------------------------------------------------------------------------------- /code-r1/examples/rloo_trainer/run_qwen2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/rloo_trainer/run_qwen2-7b.sh -------------------------------------------------------------------------------- /code-r1/examples/sft/gsm8k/run_deepseek_6b7.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/sft/gsm8k/run_deepseek_6b7.sh -------------------------------------------------------------------------------- /code-r1/examples/sft/gsm8k/run_gemma_2b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/sft/gsm8k/run_gemma_2b.sh -------------------------------------------------------------------------------- /code-r1/examples/sft/gsm8k/run_gemma_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/sft/gsm8k/run_gemma_7b.sh -------------------------------------------------------------------------------- /code-r1/examples/sft/gsm8k/run_qwen2_5_05b_sft_peft_sp2_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/sft/gsm8k/run_qwen2_5_05b_sft_peft_sp2_npu.sh -------------------------------------------------------------------------------- /code-r1/examples/sft/gsm8k/run_qwen_05_peft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/sft/gsm8k/run_qwen_05_peft.sh -------------------------------------------------------------------------------- /code-r1/examples/sft/gsm8k/run_qwen_05_sp2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/sft/gsm8k/run_qwen_05_sp2.sh -------------------------------------------------------------------------------- /code-r1/examples/sft/gsm8k/run_qwen_05_sp2_liger.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/sft/gsm8k/run_qwen_05_sp2_liger.sh -------------------------------------------------------------------------------- /code-r1/examples/sft/multiturn/run_qwen_05_sp2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/sft/multiturn/run_qwen_05_sp2.sh -------------------------------------------------------------------------------- /code-r1/examples/sglang_multiturn/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/sglang_multiturn/README.md -------------------------------------------------------------------------------- /code-r1/examples/slurm/ray_on_slurm.slurm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/slurm/ray_on_slurm.slurm -------------------------------------------------------------------------------- /code-r1/examples/split_placement/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/split_placement/README.md -------------------------------------------------------------------------------- /code-r1/examples/split_placement/config/ppo_trainer_split.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/split_placement/config/ppo_trainer_split.yaml -------------------------------------------------------------------------------- /code-r1/examples/split_placement/main_ppo_split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/split_placement/main_ppo_split.py -------------------------------------------------------------------------------- /code-r1/examples/split_placement/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/split_placement/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /code-r1/examples/split_placement/split_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/split_placement/split_monkey_patch.py -------------------------------------------------------------------------------- /code-r1/examples/tuning/14b/qwen2_14b_grpo_4_h800_fsdp_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/tuning/14b/qwen2_14b_grpo_4_h800_fsdp_vllm.sh -------------------------------------------------------------------------------- /code-r1/examples/tuning/70b/qwen2-70b_grpo_32_h20_fsdp_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/tuning/70b/qwen2-70b_grpo_32_h20_fsdp_vllm.sh -------------------------------------------------------------------------------- /code-r1/examples/tuning/70b/qwen2-70b_grpo_32_h800_fsdp_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/tuning/70b/qwen2-70b_grpo_32_h800_fsdp_vllm.sh -------------------------------------------------------------------------------- /code-r1/examples/tuning/7b/qwen2-7b_grpo_2_h800_fsdp_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/examples/tuning/7b/qwen2-7b_grpo_2_h800_fsdp_vllm.sh -------------------------------------------------------------------------------- /code-r1/my_reward/code.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/my_reward/code.py -------------------------------------------------------------------------------- /code-r1/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/pyproject.toml -------------------------------------------------------------------------------- /code-r1/recipe/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/README.md -------------------------------------------------------------------------------- /code-r1/recipe/char_count/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/char_count/README.md -------------------------------------------------------------------------------- /code-r1/recipe/char_count/create_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/char_count/create_dataset.py -------------------------------------------------------------------------------- /code-r1/recipe/char_count/reward_function.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/char_count/reward_function.py -------------------------------------------------------------------------------- /code-r1/recipe/char_count/train_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/char_count/train_grpo.sh -------------------------------------------------------------------------------- /code-r1/recipe/char_count/train_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/char_count/train_sft.sh -------------------------------------------------------------------------------- /code-r1/recipe/dapo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/dapo/README.md -------------------------------------------------------------------------------- /code-r1/recipe/dapo/config/dapo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/dapo/config/dapo_trainer.yaml -------------------------------------------------------------------------------- /code-r1/recipe/dapo/dapo_ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/dapo/dapo_ray_trainer.py -------------------------------------------------------------------------------- /code-r1/recipe/dapo/main_dapo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/dapo/main_dapo.py -------------------------------------------------------------------------------- /code-r1/recipe/dapo/prepare_dapo_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/dapo/prepare_dapo_data.sh -------------------------------------------------------------------------------- /code-r1/recipe/dapo/run_dapo_early_qwen2.5_32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/dapo/run_dapo_early_qwen2.5_32b.sh -------------------------------------------------------------------------------- /code-r1/recipe/dapo/run_dapo_qwen2.5_32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/dapo/run_dapo_qwen2.5_32b.sh -------------------------------------------------------------------------------- /code-r1/recipe/dapo/run_dapo_wo_ds_qwen2.5_32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/dapo/run_dapo_wo_ds_qwen2.5_32b.sh -------------------------------------------------------------------------------- /code-r1/recipe/dapo/runtime_env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/dapo/runtime_env.yaml -------------------------------------------------------------------------------- /code-r1/recipe/dapo/test_dapo_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/dapo/test_dapo_7b.sh -------------------------------------------------------------------------------- /code-r1/recipe/dapo/test_dapo_7b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/dapo/test_dapo_7b_math.sh -------------------------------------------------------------------------------- /code-r1/recipe/dapo/test_dapo_7b_math_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/dapo/test_dapo_7b_math_lora.sh -------------------------------------------------------------------------------- /code-r1/recipe/dapo/test_dapo_7b_math_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/dapo/test_dapo_7b_math_megatron.sh -------------------------------------------------------------------------------- /code-r1/recipe/dapo/test_dapo_dspk_671b_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/dapo/test_dapo_dspk_671b_megatron.sh -------------------------------------------------------------------------------- /code-r1/recipe/dapo/test_dapo_qwen3_30b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/dapo/test_dapo_qwen3_30b_math.sh -------------------------------------------------------------------------------- /code-r1/recipe/entropy/32b_clip_cov.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/entropy/32b_clip_cov.sh -------------------------------------------------------------------------------- /code-r1/recipe/entropy/32b_kl_cov.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/entropy/32b_kl_cov.sh -------------------------------------------------------------------------------- /code-r1/recipe/entropy/32b_kl_cov_mininbsz.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/entropy/32b_kl_cov_mininbsz.sh -------------------------------------------------------------------------------- /code-r1/recipe/entropy/7b_clip_cov.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/entropy/7b_clip_cov.sh -------------------------------------------------------------------------------- /code-r1/recipe/entropy/7b_kl_cov.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/entropy/7b_kl_cov.sh -------------------------------------------------------------------------------- /code-r1/recipe/entropy/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/entropy/README.md -------------------------------------------------------------------------------- /code-r1/recipe/entropy/config/entropy_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/entropy/config/entropy_trainer.yaml -------------------------------------------------------------------------------- /code-r1/recipe/entropy/entropy_ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/entropy/entropy_ray_trainer.py -------------------------------------------------------------------------------- /code-r1/recipe/entropy/main_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/entropy/main_entropy.py -------------------------------------------------------------------------------- /code-r1/recipe/entropy/reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/entropy/reward.py -------------------------------------------------------------------------------- /code-r1/recipe/entropy/reward_score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/entropy/reward_score/__init__.py -------------------------------------------------------------------------------- /code-r1/recipe/entropy/reward_score/entropy_math/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/entropy/reward_score/entropy_math/__init__.py -------------------------------------------------------------------------------- /code-r1/recipe/entropy/reward_score/entropy_math/grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/entropy/reward_score/entropy_math/grader.py -------------------------------------------------------------------------------- /code-r1/recipe/genrm_remote/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/genrm_remote/README.md -------------------------------------------------------------------------------- /code-r1/recipe/genrm_remote/reward_function.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/genrm_remote/reward_function.py -------------------------------------------------------------------------------- /code-r1/recipe/genrm_remote/run_genrm_remote.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/genrm_remote/run_genrm_remote.sh -------------------------------------------------------------------------------- /code-r1/recipe/minicpmo/rl_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/minicpmo/rl_dataset.py -------------------------------------------------------------------------------- /code-r1/recipe/prime/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/prime/__init__.py -------------------------------------------------------------------------------- /code-r1/recipe/prime/config/prime_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/prime/config/prime_trainer.yaml -------------------------------------------------------------------------------- /code-r1/recipe/prime/main_prime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/prime/main_prime.py -------------------------------------------------------------------------------- /code-r1/recipe/prime/prime_core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/prime/prime_core_algos.py -------------------------------------------------------------------------------- /code-r1/recipe/prime/prime_dp_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/prime/prime_dp_rm.py -------------------------------------------------------------------------------- /code-r1/recipe/prime/prime_fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/prime/prime_fsdp_workers.py -------------------------------------------------------------------------------- /code-r1/recipe/prime/prime_ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/prime/prime_ray_trainer.py -------------------------------------------------------------------------------- /code-r1/recipe/prime/run_prime_qwen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/prime/run_prime_qwen.sh -------------------------------------------------------------------------------- /code-r1/recipe/prime/run_prime_qwen_code.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/prime/run_prime_qwen_code.sh -------------------------------------------------------------------------------- /code-r1/recipe/r1/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/r1/README.md -------------------------------------------------------------------------------- /code-r1/recipe/r1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/r1/__init__.py -------------------------------------------------------------------------------- /code-r1/recipe/r1/config/evaluation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/r1/config/evaluation.yaml -------------------------------------------------------------------------------- /code-r1/recipe/r1/data_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/r1/data_process.py -------------------------------------------------------------------------------- /code-r1/recipe/r1/main_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/r1/main_eval.py -------------------------------------------------------------------------------- /code-r1/recipe/r1/reward_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/r1/reward_score.py -------------------------------------------------------------------------------- /code-r1/recipe/r1/run_r1_distill_qwen.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/r1/run_r1_distill_qwen.sh -------------------------------------------------------------------------------- /code-r1/recipe/r1/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/r1/tasks/__init__.py -------------------------------------------------------------------------------- /code-r1/recipe/r1/tasks/gpqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/r1/tasks/gpqa.py -------------------------------------------------------------------------------- /code-r1/recipe/r1/tasks/livecodebench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/r1/tasks/livecodebench.py -------------------------------------------------------------------------------- /code-r1/recipe/r1/tasks/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/r1/tasks/math.py -------------------------------------------------------------------------------- /code-r1/recipe/retool/retool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/retool/retool.py -------------------------------------------------------------------------------- /code-r1/recipe/retool/retool_multi_turn_sft_preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/retool/retool_multi_turn_sft_preprocess.py -------------------------------------------------------------------------------- /code-r1/recipe/retool/retool_sft_preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/retool/retool_sft_preprocess.py -------------------------------------------------------------------------------- /code-r1/recipe/retool/run_qwen2-32b_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/retool/run_qwen2-32b_sft.sh -------------------------------------------------------------------------------- /code-r1/recipe/retool/run_qwen2.5_32b_sp8.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/retool/run_qwen2.5_32b_sp8.sh -------------------------------------------------------------------------------- /code-r1/recipe/retool/run_qwen2.5_7b_sp4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/retool/run_qwen2.5_7b_sp4.sh -------------------------------------------------------------------------------- /code-r1/recipe/retool/run_qwen3_4b_sp4.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/retool/run_qwen3_4b_sp4.sh -------------------------------------------------------------------------------- /code-r1/recipe/retool/sandbox_fusion_tool_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/retool/sandbox_fusion_tool_config.yaml -------------------------------------------------------------------------------- /code-r1/recipe/spin/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/spin/README.md -------------------------------------------------------------------------------- /code-r1/recipe/spin/config/spin_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/spin/config/spin_trainer.yaml -------------------------------------------------------------------------------- /code-r1/recipe/spin/core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/spin/core_algos.py -------------------------------------------------------------------------------- /code-r1/recipe/spin/dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/spin/dp_actor.py -------------------------------------------------------------------------------- /code-r1/recipe/spin/fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/spin/fsdp_workers.py -------------------------------------------------------------------------------- /code-r1/recipe/spin/main_spin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/spin/main_spin.py -------------------------------------------------------------------------------- /code-r1/recipe/spin/run_spin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/spin/run_spin.sh -------------------------------------------------------------------------------- /code-r1/recipe/spin/spin_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/spin/spin_trainer.py -------------------------------------------------------------------------------- /code-r1/recipe/sppo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/sppo/README.md -------------------------------------------------------------------------------- /code-r1/recipe/sppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/sppo/__init__.py -------------------------------------------------------------------------------- /code-r1/recipe/sppo/config/sppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/sppo/config/sppo_trainer.yaml -------------------------------------------------------------------------------- /code-r1/recipe/sppo/dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/sppo/dp_actor.py -------------------------------------------------------------------------------- /code-r1/recipe/sppo/main_sppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/sppo/main_sppo.py -------------------------------------------------------------------------------- /code-r1/recipe/sppo/run_qwen2.5-7b_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/sppo/run_qwen2.5-7b_rm.sh -------------------------------------------------------------------------------- /code-r1/recipe/sppo/sppo_ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/sppo/sppo_ray_trainer.py -------------------------------------------------------------------------------- /code-r1/recipe/sppo/sppo_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/recipe/sppo/sppo_worker.py -------------------------------------------------------------------------------- /code-r1/requirements-npu.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/requirements-npu.txt -------------------------------------------------------------------------------- /code-r1/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/requirements.txt -------------------------------------------------------------------------------- /code-r1/requirements_sglang.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/requirements_sglang.txt -------------------------------------------------------------------------------- /code-r1/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/scripts/__init__.py -------------------------------------------------------------------------------- /code-r1/scripts/converter_hf_to_mcore.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/scripts/converter_hf_to_mcore.py -------------------------------------------------------------------------------- /code-r1/scripts/diagnose.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/scripts/diagnose.py -------------------------------------------------------------------------------- /code-r1/scripts/init_random_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/scripts/init_random_model.py -------------------------------------------------------------------------------- /code-r1/scripts/install_vllm_sglang_mcore.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/scripts/install_vllm_sglang_mcore.sh -------------------------------------------------------------------------------- /code-r1/scripts/legacy_model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/scripts/legacy_model_merger.py -------------------------------------------------------------------------------- /code-r1/search_r1/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code-r1/search_r1/llm_agent/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code-r1/search_r1/llm_agent/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/search_r1/llm_agent/generation.py -------------------------------------------------------------------------------- /code-r1/search_r1/llm_agent/tensor_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/search_r1/llm_agent/tensor_helper.py -------------------------------------------------------------------------------- /code-r1/search_r1/search/build_index.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/search_r1/search/build_index.sh -------------------------------------------------------------------------------- /code-r1/search_r1/search/google_search_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/search_r1/search/google_search_server.py -------------------------------------------------------------------------------- /code-r1/search_r1/search/index_builder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/search_r1/search/index_builder.py -------------------------------------------------------------------------------- /code-r1/search_r1/search/rerank_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/search_r1/search/rerank_server.py -------------------------------------------------------------------------------- /code-r1/search_r1/search/retrieval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/search_r1/search/retrieval.py -------------------------------------------------------------------------------- /code-r1/search_r1/search/retrieval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/search_r1/search/retrieval.sh -------------------------------------------------------------------------------- /code-r1/search_r1/search/retrieval_request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/search_r1/search/retrieval_request.py -------------------------------------------------------------------------------- /code-r1/search_r1/search/retrieval_rerank_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/search_r1/search/retrieval_rerank_server.py -------------------------------------------------------------------------------- /code-r1/search_r1/search/retrieval_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/search_r1/search/retrieval_server.py -------------------------------------------------------------------------------- /code-r1/search_r1/search/serp_search_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/search_r1/search/serp_search_server.py -------------------------------------------------------------------------------- /code-r1/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/setup.py -------------------------------------------------------------------------------- /code-r1/tests/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/README.md -------------------------------------------------------------------------------- /code-r1/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/__init__.py -------------------------------------------------------------------------------- /code-r1/tests/experimental/agent_loop/agent_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/experimental/agent_loop/agent_utils.py -------------------------------------------------------------------------------- /code-r1/tests/experimental/agent_loop/test_basic_agent_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/experimental/agent_loop/test_basic_agent_loop.py -------------------------------------------------------------------------------- /code-r1/tests/interactions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/interactions/__init__.py -------------------------------------------------------------------------------- /code-r1/tests/interactions/test_gsm8k_interaction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/interactions/test_gsm8k_interaction.py -------------------------------------------------------------------------------- /code-r1/tests/interactions/test_interaction_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/interactions/test_interaction_registry.py -------------------------------------------------------------------------------- /code-r1/tests/kill_github_tests.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/kill_github_tests.sh -------------------------------------------------------------------------------- /code-r1/tests/models/test_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/models/test_transformer.py -------------------------------------------------------------------------------- /code-r1/tests/models/test_transformers_ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/models/test_transformers_ulysses.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/__init__.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/base/test_decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/base/test_decorator.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/check_worker_alive/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/check_worker_alive/main.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/detached_worker/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/detached_worker/README.md -------------------------------------------------------------------------------- /code-r1/tests/single_controller/detached_worker/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/detached_worker/client.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/detached_worker/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/detached_worker/run.sh -------------------------------------------------------------------------------- /code-r1/tests/single_controller/detached_worker/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/detached_worker/server.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/test_auto_padding_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/test_auto_padding_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/test_colocated_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/test_colocated_workers.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/test_colocated_workers_fused.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/test_colocated_workers_fused.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/test_data_transfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/test_data_transfer.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/test_decorator_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/test_decorator_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/test_driverfunc_to_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/test_driverfunc_to_worker.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/test_fused_workers_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/test_fused_workers_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/test_ray_collectives.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/test_ray_collectives.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/test_ray_local_envs_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/test_ray_local_envs_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/test_ray_utils_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/test_ray_utils_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/test_rvdz.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/test_rvdz.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/test_worker_group_basics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/test_worker_group_basics.py -------------------------------------------------------------------------------- /code-r1/tests/single_controller/test_worker_group_torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/single_controller/test_worker_group_torch.py -------------------------------------------------------------------------------- /code-r1/tests/special_distributed/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_distributed/README.md -------------------------------------------------------------------------------- /code-r1/tests/special_distributed/run_all.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_distributed/run_all.sh -------------------------------------------------------------------------------- /code-r1/tests/special_distributed/test_fsdp_ckpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_distributed/test_fsdp_ckpt.py -------------------------------------------------------------------------------- /code-r1/tests/special_distributed/test_tensor_dict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_distributed/test_tensor_dict.py -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/README.md -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/__init__.py -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/check_custom_rwd_fn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/check_custom_rwd_fn.py -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/check_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/check_results.py -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/envs/__init__.py -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/envs/digit_completion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/envs/digit_completion/__init__.py -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/envs/digit_completion/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/envs/digit_completion/task.py -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/envs/digit_completion/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/envs/digit_completion/tokenizer.py -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/generation/run_gen_qwen05.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/generation/run_gen_qwen05.sh -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/ppo_trainer/run_function_reward.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/ppo_trainer/run_function_reward.sh -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/ppo_trainer/run_model_reward.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/ppo_trainer/run_model_reward.sh -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/ppo_trainer/run_single_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/ppo_trainer/run_single_gpu.sh -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/run_dapo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/run_dapo.sh -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/run_genrm_remote.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/run_genrm_remote.sh -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/run_grpo_lora_with_merge.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/run_grpo_lora_with_merge.sh -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/run_ppo_trainer_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/run_ppo_trainer_megatron.sh -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/run_prime.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/run_prime.sh -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/run_r1_distill_qwen_aime24_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/run_r1_distill_qwen_aime24_eval.sh -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/run_spin.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/run_spin.sh -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/run_sppo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/run_sppo.sh -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/run_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/run_test.sh -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/sft/run_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/sft/run_sft.sh -------------------------------------------------------------------------------- /code-r1/tests/special_e2e/sft/test_sp_loss_match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_e2e/sft/test_sp_loss_match.py -------------------------------------------------------------------------------- /code-r1/tests/special_npu/run_qwen2_5_05b_dapo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_npu/run_qwen2_5_05b_dapo.sh -------------------------------------------------------------------------------- /code-r1/tests/special_npu/run_qwen2_5_05b_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_npu/run_qwen2_5_05b_grpo.sh -------------------------------------------------------------------------------- /code-r1/tests/special_npu/run_qwen2_5_05b_sft_peft_sp2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_npu/run_qwen2_5_05b_sft_peft_sp2.sh -------------------------------------------------------------------------------- /code-r1/tests/special_npu/run_qwen2_5_vl_3b_npu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_npu/run_qwen2_5_vl_3b_npu.sh -------------------------------------------------------------------------------- /code-r1/tests/special_sanity/check_api_docs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_sanity/check_api_docs.py -------------------------------------------------------------------------------- /code-r1/tests/special_sanity/check_device_api_usage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_sanity/check_device_api_usage.py -------------------------------------------------------------------------------- /code-r1/tests/special_sanity/check_docs_time_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_sanity/check_docs_time_info.py -------------------------------------------------------------------------------- /code-r1/tests/special_sanity/check_license.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_sanity/check_license.py -------------------------------------------------------------------------------- /code-r1/tests/special_sanity/check_pr_description.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_sanity/check_pr_description.py -------------------------------------------------------------------------------- /code-r1/tests/special_sanity/check_pr_title.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_sanity/check_pr_title.py -------------------------------------------------------------------------------- /code-r1/tests/special_sanity/test_config_docs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_sanity/test_config_docs.py -------------------------------------------------------------------------------- /code-r1/tests/special_sanity/test_import.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_sanity/test_import.py -------------------------------------------------------------------------------- /code-r1/tests/special_sanity/type_coverage_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_sanity/type_coverage_check.py -------------------------------------------------------------------------------- /code-r1/tests/special_sanity/validate_imported_docs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_sanity/validate_imported_docs.py -------------------------------------------------------------------------------- /code-r1/tests/special_sanity/validate_structure.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_sanity/validate_structure.py -------------------------------------------------------------------------------- /code-r1/tests/special_standalone/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_standalone/README.md -------------------------------------------------------------------------------- /code-r1/tests/special_standalone/test_memory_buffers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/special_standalone/test_memory_buffers.py -------------------------------------------------------------------------------- /code-r1/tests/test_base_config_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/test_base_config_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/test_protocol_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/test_protocol_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/tools/test_base_tool_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/tools/test_base_tool_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/trainer/__init__.py -------------------------------------------------------------------------------- /code-r1/tests/trainer/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/trainer/config/__init__.py -------------------------------------------------------------------------------- /code-r1/tests/trainer/config/test_algo_config_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/trainer/config/test_algo_config_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/trainer/ppo/__init__.py -------------------------------------------------------------------------------- /code-r1/tests/trainer/ppo/test_core_algos_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/trainer/ppo/test_core_algos_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/trainer/ppo/test_metric_utils_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/trainer/ppo/test_metric_utils_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/utils/_test_module.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/_test_module.py -------------------------------------------------------------------------------- /code-r1/tests/utils/ckpt/test_esi_save_ckpt_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/ckpt/test_esi_save_ckpt_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/utils/dataset/test_create_rl_sampler_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/dataset/test_create_rl_sampler_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/utils/dataset/test_rl_dataset_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/dataset/test_rl_dataset_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/utils/dataset/test_sft_dataset_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/dataset/test_sft_dataset_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/utils/megatron/test_pipeline_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/megatron/test_pipeline_parallel.py -------------------------------------------------------------------------------- /code-r1/tests/utils/reward_score/test_sandbox_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/reward_score/test_sandbox_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/utils/test_activation_offload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/test_activation_offload.py -------------------------------------------------------------------------------- /code-r1/tests/utils/test_config_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/test_config_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/utils/test_flops_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/test_flops_counter.py -------------------------------------------------------------------------------- /code-r1/tests/utils/test_fs_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/test_fs_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/utils/test_import_utils_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/test_import_utils_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/utils/test_linear_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/test_linear_cross_entropy.py -------------------------------------------------------------------------------- /code-r1/tests/utils/test_linear_cross_entropy_tp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/test_linear_cross_entropy_tp.py -------------------------------------------------------------------------------- /code-r1/tests/utils/test_model_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/test_model_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/utils/test_nvtx_profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/test_nvtx_profile.py -------------------------------------------------------------------------------- /code-r1/tests/utils/test_seqlen_balancing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/test_seqlen_balancing.py -------------------------------------------------------------------------------- /code-r1/tests/utils/test_timeout_decorator_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/test_timeout_decorator_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/utils/test_torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/utils/test_torch_functional.py -------------------------------------------------------------------------------- /code-r1/tests/workers/reward_manager/test_registry_on_cpu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/workers/reward_manager/test_registry_on_cpu.py -------------------------------------------------------------------------------- /code-r1/tests/workers/rollout/async_rollout_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/workers/rollout/async_rollout_utils.py -------------------------------------------------------------------------------- /code-r1/tests/workers/rollout/perf/vllm_async_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/workers/rollout/perf/vllm_async_rollout.py -------------------------------------------------------------------------------- /code-r1/tests/workers/rollout/rollout_vllm/run_fsdp_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/workers/rollout/rollout_vllm/run_fsdp_vllm.py -------------------------------------------------------------------------------- /code-r1/tests/workers/rollout/rollout_vllm/test_vllm_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/workers/rollout/rollout_vllm/test_vllm_spmd.py -------------------------------------------------------------------------------- /code-r1/tests/workers/rollout/test_async_sglang_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/workers/rollout/test_async_sglang_server.py -------------------------------------------------------------------------------- /code-r1/tests/workers/rollout/test_hf_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/workers/rollout/test_hf_rollout.py -------------------------------------------------------------------------------- /code-r1/tests/workers/rollout/test_sglang_multi_interaction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/workers/rollout/test_sglang_multi_interaction.py -------------------------------------------------------------------------------- /code-r1/tests/workers/rollout/test_sglang_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/workers/rollout/test_sglang_spmd.py -------------------------------------------------------------------------------- /code-r1/tests/workers/rollout/utils_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/tests/workers/rollout/utils_sglang.py -------------------------------------------------------------------------------- /code-r1/verl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/base_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/base_config.py -------------------------------------------------------------------------------- /code-r1/verl/experimental/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/experimental/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/experimental/agent_loop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/experimental/agent_loop/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/experimental/agent_loop/agent_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/experimental/agent_loop/agent_loop.py -------------------------------------------------------------------------------- /code-r1/verl/experimental/agent_loop/single_turn_agent_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/experimental/agent_loop/single_turn_agent_loop.py -------------------------------------------------------------------------------- /code-r1/verl/experimental/agent_loop/tool_agent_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/experimental/agent_loop/tool_agent_loop.py -------------------------------------------------------------------------------- /code-r1/verl/interactions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/interactions/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/interactions/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/interactions/base.py -------------------------------------------------------------------------------- /code-r1/verl/interactions/gsm8k_interaction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/interactions/gsm8k_interaction.py -------------------------------------------------------------------------------- /code-r1/verl/interactions/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/interactions/utils/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/interactions/utils/interaction_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/interactions/utils/interaction_registry.py -------------------------------------------------------------------------------- /code-r1/verl/model_merger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/model_merger/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/model_merger/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/model_merger/__main__.py -------------------------------------------------------------------------------- /code-r1/verl/model_merger/base_model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/model_merger/base_model_merger.py -------------------------------------------------------------------------------- /code-r1/verl/model_merger/fsdp_model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/model_merger/fsdp_model_merger.py -------------------------------------------------------------------------------- /code-r1/verl/model_merger/megatron_model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/model_merger/megatron_model_merger.py -------------------------------------------------------------------------------- /code-r1/verl/models/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/README.md -------------------------------------------------------------------------------- /code-r1/verl/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/models/llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/llama/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/llama/megatron/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/models/llama/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/llama/megatron/checkpoint_utils/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/llama/megatron/layers/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/models/llama/megatron/layers/parallel_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/llama/megatron/layers/parallel_attention.py -------------------------------------------------------------------------------- /code-r1/verl/models/llama/megatron/layers/parallel_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/llama/megatron/layers/parallel_decoder.py -------------------------------------------------------------------------------- /code-r1/verl/models/llama/megatron/layers/parallel_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/llama/megatron/layers/parallel_linear.py -------------------------------------------------------------------------------- /code-r1/verl/models/llama/megatron/layers/parallel_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/llama/megatron/layers/parallel_mlp.py -------------------------------------------------------------------------------- /code-r1/verl/models/llama/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/llama/megatron/layers/parallel_rmsnorm.py -------------------------------------------------------------------------------- /code-r1/verl/models/llama/megatron/modeling_llama_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/llama/megatron/modeling_llama_megatron.py -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/config_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/config_converter.py -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/loader.py -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/mbridge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/mbridge.py -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/model_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/model_forward.py -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/model_initializer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/model_initializer.py -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/patch_v012.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/patch_v012.py -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/qwen2_5_vl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/qwen2_5_vl/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/qwen2_5_vl/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/qwen2_5_vl/attention.py -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/qwen2_5_vl/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/qwen2_5_vl/model.py -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/qwen2_5_vl/rope_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/qwen2_5_vl/rope_utils.py -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/qwen2_5_vl/vision_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/qwen2_5_vl/vision_config.py -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/qwen2_5_vl/vision_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/qwen2_5_vl/vision_model.py -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/readme.md -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/registry.py -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/saver.py -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/util.py -------------------------------------------------------------------------------- /code-r1/verl/models/mcore/weight_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/mcore/weight_converter.py -------------------------------------------------------------------------------- /code-r1/verl/models/qwen2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/qwen2/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/models/qwen2/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/qwen2/megatron/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/models/qwen2/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/qwen2/megatron/checkpoint_utils/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/models/qwen2/megatron/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/qwen2/megatron/layers/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/models/qwen2/megatron/layers/parallel_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/qwen2/megatron/layers/parallel_attention.py -------------------------------------------------------------------------------- /code-r1/verl/models/qwen2/megatron/layers/parallel_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/qwen2/megatron/layers/parallel_decoder.py -------------------------------------------------------------------------------- /code-r1/verl/models/qwen2/megatron/layers/parallel_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/qwen2/megatron/layers/parallel_linear.py -------------------------------------------------------------------------------- /code-r1/verl/models/qwen2/megatron/layers/parallel_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/qwen2/megatron/layers/parallel_mlp.py -------------------------------------------------------------------------------- /code-r1/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py -------------------------------------------------------------------------------- /code-r1/verl/models/qwen2/megatron/modeling_qwen2_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/qwen2/megatron/modeling_qwen2_megatron.py -------------------------------------------------------------------------------- /code-r1/verl/models/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/registry.py -------------------------------------------------------------------------------- /code-r1/verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/transformers/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/models/transformers/dense_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/transformers/dense_common.py -------------------------------------------------------------------------------- /code-r1/verl/models/transformers/kimi_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/transformers/kimi_vl.py -------------------------------------------------------------------------------- /code-r1/verl/models/transformers/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/transformers/llama.py -------------------------------------------------------------------------------- /code-r1/verl/models/transformers/monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/transformers/monkey_patch.py -------------------------------------------------------------------------------- /code-r1/verl/models/transformers/npu_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/transformers/npu_patch.py -------------------------------------------------------------------------------- /code-r1/verl/models/transformers/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/transformers/qwen2.py -------------------------------------------------------------------------------- /code-r1/verl/models/transformers/qwen2_5_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/transformers/qwen2_5_vl.py -------------------------------------------------------------------------------- /code-r1/verl/models/transformers/qwen2_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/transformers/qwen2_vl.py -------------------------------------------------------------------------------- /code-r1/verl/models/weight_loader_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/models/weight_loader_registry.py -------------------------------------------------------------------------------- /code-r1/verl/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/protocol.py -------------------------------------------------------------------------------- /code-r1/verl/single_controller/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/single_controller/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/single_controller/base/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/single_controller/base/decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/single_controller/base/decorator.py -------------------------------------------------------------------------------- /code-r1/verl/single_controller/base/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/single_controller/base/megatron/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/single_controller/base/megatron/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/single_controller/base/megatron/worker.py -------------------------------------------------------------------------------- /code-r1/verl/single_controller/base/megatron/worker_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/single_controller/base/megatron/worker_group.py -------------------------------------------------------------------------------- /code-r1/verl/single_controller/base/register_center/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/single_controller/base/register_center/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/single_controller/base/register_center/ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/single_controller/base/register_center/ray.py -------------------------------------------------------------------------------- /code-r1/verl/single_controller/base/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/single_controller/base/worker.py -------------------------------------------------------------------------------- /code-r1/verl/single_controller/base/worker_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/single_controller/base/worker_group.py -------------------------------------------------------------------------------- /code-r1/verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/single_controller/ray/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/single_controller/ray/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/single_controller/ray/base.py -------------------------------------------------------------------------------- /code-r1/verl/single_controller/ray/megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/single_controller/ray/megatron.py -------------------------------------------------------------------------------- /code-r1/verl/third_party/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/third_party/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/third_party/sglang/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/third_party/sglang/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/third_party/sglang/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/third_party/sglang/parallel_state.py -------------------------------------------------------------------------------- /code-r1/verl/third_party/vllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/third_party/vllm/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/tools/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/tools/base_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/tools/base_tool.py -------------------------------------------------------------------------------- /code-r1/verl/tools/geo3k_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/tools/geo3k_tool.py -------------------------------------------------------------------------------- /code-r1/verl/tools/gsm8k_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/tools/gsm8k_tool.py -------------------------------------------------------------------------------- /code-r1/verl/tools/mcp_base_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/tools/mcp_base_tool.py -------------------------------------------------------------------------------- /code-r1/verl/tools/mcp_search_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/tools/mcp_search_tool.py -------------------------------------------------------------------------------- /code-r1/verl/tools/sandbox_fusion_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/tools/sandbox_fusion_tools.py -------------------------------------------------------------------------------- /code-r1/verl/tools/schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/tools/schemas.py -------------------------------------------------------------------------------- /code-r1/verl/tools/search_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/tools/search_tool.py -------------------------------------------------------------------------------- /code-r1/verl/tools/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/tools/utils/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/tools/utils/mcp_clients/McpClientManager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/tools/utils/mcp_clients/McpClientManager.py -------------------------------------------------------------------------------- /code-r1/verl/tools/utils/mcp_clients/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/tools/utils/mcp_clients/utils.py -------------------------------------------------------------------------------- /code-r1/verl/tools/utils/search_r1_like_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/tools/utils/search_r1_like_utils.py -------------------------------------------------------------------------------- /code-r1/verl/tools/utils/tool_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/tools/utils/tool_registry.py -------------------------------------------------------------------------------- /code-r1/verl/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/trainer/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/config/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/trainer/config/algorithm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/config/algorithm.py -------------------------------------------------------------------------------- /code-r1/verl/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/config/evaluation.yaml -------------------------------------------------------------------------------- /code-r1/verl/trainer/config/generation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/config/generation.yaml -------------------------------------------------------------------------------- /code-r1/verl/trainer/config/grpo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/config/grpo_trainer.yaml -------------------------------------------------------------------------------- /code-r1/verl/trainer/config/ppo_megatron_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/config/ppo_megatron_trainer.yaml -------------------------------------------------------------------------------- /code-r1/verl/trainer/config/ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/config/ppo_trainer.yaml -------------------------------------------------------------------------------- /code-r1/verl/trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/config/sft_trainer.yaml -------------------------------------------------------------------------------- /code-r1/verl/trainer/constants_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/constants_ppo.py -------------------------------------------------------------------------------- /code-r1/verl/trainer/fsdp_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/fsdp_sft_trainer.py -------------------------------------------------------------------------------- /code-r1/verl/trainer/main_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/main_eval.py -------------------------------------------------------------------------------- /code-r1/verl/trainer/main_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/main_generation.py -------------------------------------------------------------------------------- /code-r1/verl/trainer/main_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/main_ppo.py -------------------------------------------------------------------------------- /code-r1/verl/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/ppo/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/trainer/ppo/core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/ppo/core_algos.py -------------------------------------------------------------------------------- /code-r1/verl/trainer/ppo/metric_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/ppo/metric_utils.py -------------------------------------------------------------------------------- /code-r1/verl/trainer/ppo/ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/ppo/ray_trainer.py -------------------------------------------------------------------------------- /code-r1/verl/trainer/ppo/reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/trainer/ppo/reward.py -------------------------------------------------------------------------------- /code-r1/verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- 1 | working_dir: ./ 2 | excludes: ["/.git/"] 3 | env_vars: 4 | TORCH_NCCL_AVOID_RECORD_STREAMS: "1" 5 | -------------------------------------------------------------------------------- /code-r1/verl/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/utils/activation_offload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/activation_offload.py -------------------------------------------------------------------------------- /code-r1/verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/checkpoint/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/utils/checkpoint/checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/checkpoint/checkpoint_manager.py -------------------------------------------------------------------------------- /code-r1/verl/utils/checkpoint/fsdp_checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/checkpoint/fsdp_checkpoint_manager.py -------------------------------------------------------------------------------- /code-r1/verl/utils/checkpoint/megatron_checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/checkpoint/megatron_checkpoint_manager.py -------------------------------------------------------------------------------- /code-r1/verl/utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/config.py -------------------------------------------------------------------------------- /code-r1/verl/utils/dataset/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/dataset/README.md -------------------------------------------------------------------------------- /code-r1/verl/utils/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/dataset/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/utils/dataset/multiturn_sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/dataset/multiturn_sft_dataset.py -------------------------------------------------------------------------------- /code-r1/verl/utils/dataset/rl_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/dataset/rl_dataset.py -------------------------------------------------------------------------------- /code-r1/verl/utils/dataset/rm_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/dataset/rm_dataset.py -------------------------------------------------------------------------------- /code-r1/verl/utils/dataset/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/dataset/sampler.py -------------------------------------------------------------------------------- /code-r1/verl/utils/dataset/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/dataset/sft_dataset.py -------------------------------------------------------------------------------- /code-r1/verl/utils/dataset/vision_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/dataset/vision_utils.py -------------------------------------------------------------------------------- /code-r1/verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/debug/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/utils/debug/performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/debug/performance.py -------------------------------------------------------------------------------- /code-r1/verl/utils/debug/trajectory_tracker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/debug/trajectory_tracker.py -------------------------------------------------------------------------------- /code-r1/verl/utils/device.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/device.py -------------------------------------------------------------------------------- /code-r1/verl/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/distributed.py -------------------------------------------------------------------------------- /code-r1/verl/utils/experimental/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/experimental/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/utils/experimental/torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/experimental/torch_functional.py -------------------------------------------------------------------------------- /code-r1/verl/utils/flops_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/flops_counter.py -------------------------------------------------------------------------------- /code-r1/verl/utils/fs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/fs.py -------------------------------------------------------------------------------- /code-r1/verl/utils/fsdp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/fsdp_utils.py -------------------------------------------------------------------------------- /code-r1/verl/utils/hdfs_io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/hdfs_io.py -------------------------------------------------------------------------------- /code-r1/verl/utils/import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/import_utils.py -------------------------------------------------------------------------------- /code-r1/verl/utils/kernel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/kernel/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/utils/kernel/kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/kernel/kernels.py -------------------------------------------------------------------------------- /code-r1/verl/utils/kernel/linear_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/kernel/linear_cross_entropy.py -------------------------------------------------------------------------------- /code-r1/verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/logger/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/utils/logger/aggregate_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/logger/aggregate_logger.py -------------------------------------------------------------------------------- /code-r1/verl/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/logging_utils.py -------------------------------------------------------------------------------- /code-r1/verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/megatron/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/utils/megatron/dist_checkpointing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/megatron/dist_checkpointing.py -------------------------------------------------------------------------------- /code-r1/verl/utils/megatron/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/megatron/memory.py -------------------------------------------------------------------------------- /code-r1/verl/utils/megatron/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/megatron/optimizer.py -------------------------------------------------------------------------------- /code-r1/verl/utils/megatron/pipeline_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/megatron/pipeline_parallel.py -------------------------------------------------------------------------------- /code-r1/verl/utils/megatron/sequence_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/megatron/sequence_parallel.py -------------------------------------------------------------------------------- /code-r1/verl/utils/megatron/tensor_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/megatron/tensor_parallel.py -------------------------------------------------------------------------------- /code-r1/verl/utils/megatron_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/megatron_utils.py -------------------------------------------------------------------------------- /code-r1/verl/utils/memory_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/memory_buffer.py -------------------------------------------------------------------------------- /code-r1/verl/utils/metric/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/metric/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/utils/metric/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/metric/utils.py -------------------------------------------------------------------------------- /code-r1/verl/utils/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/model.py -------------------------------------------------------------------------------- /code-r1/verl/utils/net_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/net_utils.py -------------------------------------------------------------------------------- /code-r1/verl/utils/profiler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/profiler/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/utils/profiler/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/profiler/config.py -------------------------------------------------------------------------------- /code-r1/verl/utils/profiler/empty_annotations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/profiler/empty_annotations.py -------------------------------------------------------------------------------- /code-r1/verl/utils/profiler/nvtx_profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/profiler/nvtx_profile.py -------------------------------------------------------------------------------- /code-r1/verl/utils/profiler/performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/profiler/performance.py -------------------------------------------------------------------------------- /code-r1/verl/utils/profiler/profile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/profiler/profile.py -------------------------------------------------------------------------------- /code-r1/verl/utils/py_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/py_functional.py -------------------------------------------------------------------------------- /code-r1/verl/utils/ray_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/ray_utils.py -------------------------------------------------------------------------------- /code-r1/verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/rendezvous/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/utils/rendezvous/ray_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/rendezvous/ray_backend.py -------------------------------------------------------------------------------- /code-r1/verl/utils/reward_score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/reward_score/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/utils/reward_score/geo3k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/reward_score/geo3k.py -------------------------------------------------------------------------------- /code-r1/verl/utils/reward_score/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/reward_score/gsm8k.py -------------------------------------------------------------------------------- /code-r1/verl/utils/reward_score/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/reward_score/math.py -------------------------------------------------------------------------------- /code-r1/verl/utils/reward_score/math_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/reward_score/math_batch.py -------------------------------------------------------------------------------- /code-r1/verl/utils/reward_score/math_dapo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/reward_score/math_dapo.py -------------------------------------------------------------------------------- /code-r1/verl/utils/reward_score/math_verify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/reward_score/math_verify.py -------------------------------------------------------------------------------- /code-r1/verl/utils/reward_score/prime_code/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/reward_score/prime_code/README.md -------------------------------------------------------------------------------- /code-r1/verl/utils/reward_score/prime_code/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/reward_score/prime_code/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/utils/reward_score/prime_code/testing_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/reward_score/prime_code/testing_util.py -------------------------------------------------------------------------------- /code-r1/verl/utils/reward_score/prime_code/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/reward_score/prime_code/utils.py -------------------------------------------------------------------------------- /code-r1/verl/utils/reward_score/prime_math/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/reward_score/prime_math/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/utils/reward_score/prime_math/grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/reward_score/prime_math/grader.py -------------------------------------------------------------------------------- /code-r1/verl/utils/reward_score/prime_math/math_normalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/reward_score/prime_math/math_normalize.py -------------------------------------------------------------------------------- /code-r1/verl/utils/reward_score/sandbox_fusion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/reward_score/sandbox_fusion/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/utils/reward_score/sandbox_fusion/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/reward_score/sandbox_fusion/utils.py -------------------------------------------------------------------------------- /code-r1/verl/utils/reward_score/search_r1_like_qa_em.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/reward_score/search_r1_like_qa_em.py -------------------------------------------------------------------------------- /code-r1/verl/utils/seqlen_balancing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/seqlen_balancing.py -------------------------------------------------------------------------------- /code-r1/verl/utils/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/tokenizer.py -------------------------------------------------------------------------------- /code-r1/verl/utils/torch_dtypes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/torch_dtypes.py -------------------------------------------------------------------------------- /code-r1/verl/utils/torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/torch_functional.py -------------------------------------------------------------------------------- /code-r1/verl/utils/tracking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/tracking.py -------------------------------------------------------------------------------- /code-r1/verl/utils/ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/ulysses.py -------------------------------------------------------------------------------- /code-r1/verl/utils/vllm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/utils/vllm_utils.py -------------------------------------------------------------------------------- /code-r1/verl/version/version: -------------------------------------------------------------------------------- 1 | 0.4.1.dev 2 | -------------------------------------------------------------------------------- /code-r1/verl/workers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/actor/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/workers/actor/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/actor/base.py -------------------------------------------------------------------------------- /code-r1/verl/workers/actor/dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/actor/dp_actor.py -------------------------------------------------------------------------------- /code-r1/verl/workers/actor/megatron_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/actor/megatron_actor.py -------------------------------------------------------------------------------- /code-r1/verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/critic/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/workers/critic/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/critic/base.py -------------------------------------------------------------------------------- /code-r1/verl/workers/critic/dp_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/critic/dp_critic.py -------------------------------------------------------------------------------- /code-r1/verl/workers/critic/megatron_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/critic/megatron_critic.py -------------------------------------------------------------------------------- /code-r1/verl/workers/fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/fsdp_workers.py -------------------------------------------------------------------------------- /code-r1/verl/workers/megatron_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/megatron_workers.py -------------------------------------------------------------------------------- /code-r1/verl/workers/reward_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/reward_manager/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/workers/reward_manager/batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/reward_manager/batch.py -------------------------------------------------------------------------------- /code-r1/verl/workers/reward_manager/dapo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/reward_manager/dapo.py -------------------------------------------------------------------------------- /code-r1/verl/workers/reward_manager/naive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/reward_manager/naive.py -------------------------------------------------------------------------------- /code-r1/verl/workers/reward_manager/prime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/reward_manager/prime.py -------------------------------------------------------------------------------- /code-r1/verl/workers/reward_manager/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/reward_manager/registry.py -------------------------------------------------------------------------------- /code-r1/verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/reward_model/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/workers/reward_model/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/reward_model/base.py -------------------------------------------------------------------------------- /code-r1/verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/reward_model/megatron/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/workers/reward_model/megatron/reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/reward_model/megatron/reward_model.py -------------------------------------------------------------------------------- /code-r1/verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/rollout/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/workers/rollout/async_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/rollout/async_server.py -------------------------------------------------------------------------------- /code-r1/verl/workers/rollout/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/rollout/base.py -------------------------------------------------------------------------------- /code-r1/verl/workers/rollout/chat_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/rollout/chat_scheduler.py -------------------------------------------------------------------------------- /code-r1/verl/workers/rollout/hf_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/rollout/hf_rollout.py -------------------------------------------------------------------------------- /code-r1/verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/rollout/naive/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/workers/rollout/naive/naive_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/rollout/naive/naive_rollout.py -------------------------------------------------------------------------------- /code-r1/verl/workers/rollout/schemas.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/rollout/schemas.py -------------------------------------------------------------------------------- /code-r1/verl/workers/rollout/sglang_rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/rollout/sglang_rollout/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/workers/rollout/sglang_rollout/sglang_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/rollout/sglang_rollout/sglang_rollout.py -------------------------------------------------------------------------------- /code-r1/verl/workers/rollout/sglang_rollout/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/rollout/sglang_rollout/utils.py -------------------------------------------------------------------------------- /code-r1/verl/workers/rollout/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/rollout/tokenizer.py -------------------------------------------------------------------------------- /code-r1/verl/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/rollout/vllm_rollout/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/workers/rollout/vllm_rollout/vllm_async_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/rollout/vllm_rollout/vllm_async_server.py -------------------------------------------------------------------------------- /code-r1/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py -------------------------------------------------------------------------------- /code-r1/verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/sharding_manager/__init__.py -------------------------------------------------------------------------------- /code-r1/verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/sharding_manager/base.py -------------------------------------------------------------------------------- /code-r1/verl/workers/sharding_manager/fsdp_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/sharding_manager/fsdp_sglang.py -------------------------------------------------------------------------------- /code-r1/verl/workers/sharding_manager/fsdp_ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/sharding_manager/fsdp_ulysses.py -------------------------------------------------------------------------------- /code-r1/verl/workers/sharding_manager/fsdp_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/sharding_manager/fsdp_vllm.py -------------------------------------------------------------------------------- /code-r1/verl/workers/sharding_manager/megatron_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/sharding_manager/megatron_sglang.py -------------------------------------------------------------------------------- /code-r1/verl/workers/sharding_manager/megatron_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/code-r1/verl/workers/sharding_manager/megatron_vllm.py -------------------------------------------------------------------------------- /dapo_from_scratch/dapo_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/dapo_from_scratch/dapo_loss.png -------------------------------------------------------------------------------- /dapo_from_scratch/grpo_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/dapo_from_scratch/grpo_loss.png -------------------------------------------------------------------------------- /dapo_from_scratch/reward_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/dapo_from_scratch/reward_func.py -------------------------------------------------------------------------------- /dapo_from_scratch/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/dapo_from_scratch/test.py -------------------------------------------------------------------------------- /dapo_from_scratch/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/dapo_from_scratch/train.py -------------------------------------------------------------------------------- /date_modify.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/date_modify.ipynb -------------------------------------------------------------------------------- /deep_research/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deep_research/client.py -------------------------------------------------------------------------------- /deep_research/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deep_research/prompts.py -------------------------------------------------------------------------------- /deep_research/search_mcp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deep_research/search_mcp.py -------------------------------------------------------------------------------- /deep_research/searxng/docker-compose.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deep_research/searxng/docker-compose.yaml -------------------------------------------------------------------------------- /deep_research/searxng/settings.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deep_research/searxng/settings.yml -------------------------------------------------------------------------------- /deep_research/test.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deep_research/test.log -------------------------------------------------------------------------------- /deep_research/test.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deep_research/test.md -------------------------------------------------------------------------------- /deepseek_learn/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deepseek_learn/1.png -------------------------------------------------------------------------------- /deepseek_learn/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deepseek_learn/2.png -------------------------------------------------------------------------------- /deepseek_learn/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deepseek_learn/3.png -------------------------------------------------------------------------------- /deepseek_learn/MLA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deepseek_learn/MLA.py -------------------------------------------------------------------------------- /deepseek_learn/MTP_train/MTP.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deepseek_learn/MTP_train/MTP.py -------------------------------------------------------------------------------- /deepseek_learn/MTP_train/loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deepseek_learn/MTP_train/loss.png -------------------------------------------------------------------------------- /deepseek_learn/MTP_train/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deepseek_learn/MTP_train/test.py -------------------------------------------------------------------------------- /deepseek_learn/deepseek_r1_train/deepseek-r1解读.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deepseek_learn/deepseek_r1_train/deepseek-r1解读.md -------------------------------------------------------------------------------- /deepseek_learn/deepseek_r1_train/deepseek_r1_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deepseek_learn/deepseek_r1_train/deepseek_r1_train.py -------------------------------------------------------------------------------- /deepseek_learn/deepseek_r1_train/deepseek_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deepseek_learn/deepseek_r1_train/deepseek_reward.png -------------------------------------------------------------------------------- /deepseek_learn/dsa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deepseek_learn/dsa/README.md -------------------------------------------------------------------------------- /deepseek_learn/dsa/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deepseek_learn/dsa/dataset.py -------------------------------------------------------------------------------- /deepseek_learn/dsa/get_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deepseek_learn/dsa/get_data.py -------------------------------------------------------------------------------- /deepseek_learn/dsa/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deepseek_learn/dsa/model.py -------------------------------------------------------------------------------- /deepseek_learn/dsa/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deepseek_learn/dsa/train.py -------------------------------------------------------------------------------- /deepseek_learn/dsa/warmup_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deepseek_learn/dsa/warmup_train.py -------------------------------------------------------------------------------- /deepseek_learn/test.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/deepseek_learn/test.ipynb -------------------------------------------------------------------------------- /gradio_mcp_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/gradio_mcp_client.py -------------------------------------------------------------------------------- /grpo_from_scratch/grpo_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/grpo_from_scratch/grpo_loss.png -------------------------------------------------------------------------------- /grpo_from_scratch/reward_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/grpo_from_scratch/reward_func.py -------------------------------------------------------------------------------- /grpo_from_scratch/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/grpo_from_scratch/test.py -------------------------------------------------------------------------------- /grpo_from_scratch/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/grpo_from_scratch/train.py -------------------------------------------------------------------------------- /knowledge_distillation_embedding/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_embedding/README.md -------------------------------------------------------------------------------- /knowledge_distillation_embedding/data_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_embedding/data_process.py -------------------------------------------------------------------------------- /knowledge_distillation_embedding/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_embedding/dataset.py -------------------------------------------------------------------------------- /knowledge_distillation_embedding/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_embedding/evaluation.py -------------------------------------------------------------------------------- /knowledge_distillation_embedding/get_distillation_data_local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_embedding/get_distillation_data_local.py -------------------------------------------------------------------------------- /knowledge_distillation_embedding/merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_embedding/merge.py -------------------------------------------------------------------------------- /knowledge_distillation_embedding/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_embedding/train.py -------------------------------------------------------------------------------- /knowledge_distillation_llm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_llm/README.md -------------------------------------------------------------------------------- /knowledge_distillation_llm/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_llm/dataset.py -------------------------------------------------------------------------------- /knowledge_distillation_llm/images/fkl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_llm/images/fkl.png -------------------------------------------------------------------------------- /knowledge_distillation_llm/images/fkl_formula.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_llm/images/fkl_formula.png -------------------------------------------------------------------------------- /knowledge_distillation_llm/images/rkl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_llm/images/rkl.png -------------------------------------------------------------------------------- /knowledge_distillation_llm/images/rkl_formula.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_llm/images/rkl_formula.png -------------------------------------------------------------------------------- /knowledge_distillation_llm/on_policy_distillation_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_llm/on_policy_distillation_train.py -------------------------------------------------------------------------------- /knowledge_distillation_llm/on_policy_distillation_train_rl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_llm/on_policy_distillation_train_rl.py -------------------------------------------------------------------------------- /knowledge_distillation_llm/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_llm/train.py -------------------------------------------------------------------------------- /knowledge_distillation_llm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_llm/utils.py -------------------------------------------------------------------------------- /knowledge_distillation_llm_cross_tokenizer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_llm_cross_tokenizer/README.md -------------------------------------------------------------------------------- /knowledge_distillation_llm_cross_tokenizer/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_llm_cross_tokenizer/dataset.py -------------------------------------------------------------------------------- /knowledge_distillation_llm_cross_tokenizer/example.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_llm_cross_tokenizer/example.json -------------------------------------------------------------------------------- /knowledge_distillation_llm_cross_tokenizer/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_llm_cross_tokenizer/train.py -------------------------------------------------------------------------------- /knowledge_distillation_llm_cross_tokenizer/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/knowledge_distillation_llm_cross_tokenizer/utils.py -------------------------------------------------------------------------------- /langgraph_agent/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/langgraph_agent/README.md -------------------------------------------------------------------------------- /langgraph_agent/graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/langgraph_agent/graph.py -------------------------------------------------------------------------------- /langgraph_agent/nodes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/langgraph_agent/nodes.py -------------------------------------------------------------------------------- /langgraph_agent/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/langgraph_agent/prompts.py -------------------------------------------------------------------------------- /langgraph_agent/state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/langgraph_agent/state.py -------------------------------------------------------------------------------- /langgraph_agent/tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/langgraph_agent/tools.py -------------------------------------------------------------------------------- /pdf2markdown/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/pdf2markdown/README.md -------------------------------------------------------------------------------- /pdf2markdown/pdf2markdown.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/pdf2markdown/pdf2markdown.py -------------------------------------------------------------------------------- /ppo_from_scratch/ppo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/ppo_from_scratch/ppo.png -------------------------------------------------------------------------------- /ppo_from_scratch/ppo_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/ppo_from_scratch/ppo_train.py -------------------------------------------------------------------------------- /rag_demo/medical_data.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/rag_demo/medical_data.txt -------------------------------------------------------------------------------- /rag_demo/rag.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/rag_demo/rag.ipynb -------------------------------------------------------------------------------- /reinforce++/data_process.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/reinforce++/data_process.ipynb -------------------------------------------------------------------------------- /reinforce++/train_reinforce++.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/reinforce++/train_reinforce++.py -------------------------------------------------------------------------------- /remax/data_process.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/remax/data_process.ipynb -------------------------------------------------------------------------------- /remax/train_remax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/remax/train_remax.py -------------------------------------------------------------------------------- /rloo/data_process.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/rloo/data_process.ipynb -------------------------------------------------------------------------------- /rloo/train_rloo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/rloo/train_rloo.py -------------------------------------------------------------------------------- /s1_from_scratch/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/s1_from_scratch/README.md -------------------------------------------------------------------------------- /s1_from_scratch/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/s1_from_scratch/generate.py -------------------------------------------------------------------------------- /s1_from_scratch/s1_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/s1_from_scratch/s1_train.py -------------------------------------------------------------------------------- /s1_from_scratch/training_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/s1_from_scratch/training_loss.png -------------------------------------------------------------------------------- /table_extract/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/table_extract/README.md -------------------------------------------------------------------------------- /table_extract/chinese_cht.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/table_extract/chinese_cht.ttf -------------------------------------------------------------------------------- /table_extract/imgs/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/table_extract/imgs/2.jpg -------------------------------------------------------------------------------- /table_extract/imgs/extract_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/table_extract/imgs/extract_2.png -------------------------------------------------------------------------------- /table_extract/table2txt.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/table_extract/table2txt.ipynb -------------------------------------------------------------------------------- /table_rag.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/table_rag.ipynb -------------------------------------------------------------------------------- /train_llm_from_scratch/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_llm_from_scratch/README.md -------------------------------------------------------------------------------- /train_llm_from_scratch/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_llm_from_scratch/dataset.py -------------------------------------------------------------------------------- /train_llm_from_scratch/dpo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_llm_from_scratch/dpo.png -------------------------------------------------------------------------------- /train_llm_from_scratch/dpo_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_llm_from_scratch/dpo_train.py -------------------------------------------------------------------------------- /train_llm_from_scratch/screenshot-20241207-093824.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_llm_from_scratch/screenshot-20241207-093824.png -------------------------------------------------------------------------------- /train_llm_from_scratch/sft_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_llm_from_scratch/sft_train.py -------------------------------------------------------------------------------- /train_llm_from_scratch/test_llm.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_llm_from_scratch/test_llm.ipynb -------------------------------------------------------------------------------- /train_llm_from_scratch/tokenizer/merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_llm_from_scratch/tokenizer/merges.txt -------------------------------------------------------------------------------- /train_llm_from_scratch/tokenizer/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_llm_from_scratch/tokenizer/tokenizer.json -------------------------------------------------------------------------------- /train_llm_from_scratch/tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_llm_from_scratch/tokenizer/tokenizer_config.json -------------------------------------------------------------------------------- /train_llm_from_scratch/tokenizer/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_llm_from_scratch/tokenizer/vocab.json -------------------------------------------------------------------------------- /train_llm_from_scratch/train.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_llm_from_scratch/train.ipynb -------------------------------------------------------------------------------- /train_llm_from_scratch/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_llm_from_scratch/train.py -------------------------------------------------------------------------------- /train_llm_from_scratch/train_tokenizer.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_llm_from_scratch/train_tokenizer.ipynb -------------------------------------------------------------------------------- /train_llm_from_scratch/trainer_state_pretrain.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_llm_from_scratch/trainer_state_pretrain.json -------------------------------------------------------------------------------- /train_llm_from_scratch/trainer_state_sft.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_llm_from_scratch/trainer_state_sft.json -------------------------------------------------------------------------------- /train_moe_from_scratch/1.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_moe_from_scratch/1.ipynb -------------------------------------------------------------------------------- /train_moe_from_scratch/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_moe_from_scratch/README.md -------------------------------------------------------------------------------- /train_moe_from_scratch/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_moe_from_scratch/dataset.py -------------------------------------------------------------------------------- /train_moe_from_scratch/moe_sft_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_moe_from_scratch/moe_sft_train.py -------------------------------------------------------------------------------- /train_moe_from_scratch/moe_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_moe_from_scratch/moe_test.py -------------------------------------------------------------------------------- /train_moe_from_scratch/moe_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_moe_from_scratch/moe_train.py -------------------------------------------------------------------------------- /train_moe_from_scratch/screenshot-20241207-093824.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_moe_from_scratch/screenshot-20241207-093824.png -------------------------------------------------------------------------------- /train_moe_from_scratch/sft.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_moe_from_scratch/sft.jsonl -------------------------------------------------------------------------------- /train_moe_from_scratch/tokenizer/merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_moe_from_scratch/tokenizer/merges.txt -------------------------------------------------------------------------------- /train_moe_from_scratch/tokenizer/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_moe_from_scratch/tokenizer/tokenizer.json -------------------------------------------------------------------------------- /train_moe_from_scratch/tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_moe_from_scratch/tokenizer/tokenizer_config.json -------------------------------------------------------------------------------- /train_moe_from_scratch/tokenizer/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_moe_from_scratch/tokenizer/vocab.json -------------------------------------------------------------------------------- /train_moe_from_scratch/train.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_moe_from_scratch/train.jsonl -------------------------------------------------------------------------------- /train_multimodal_from_scratch/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_multimodal_from_scratch/README.md -------------------------------------------------------------------------------- /train_multimodal_from_scratch/gradio_vlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_multimodal_from_scratch/gradio_vlm.py -------------------------------------------------------------------------------- /train_multimodal_from_scratch/sft_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_multimodal_from_scratch/sft_train.py -------------------------------------------------------------------------------- /train_multimodal_from_scratch/sft_train_multi_images.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_multimodal_from_scratch/sft_train_multi_images.py -------------------------------------------------------------------------------- /train_multimodal_from_scratch/test.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_multimodal_from_scratch/test.ipynb -------------------------------------------------------------------------------- /train_multimodal_from_scratch/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_multimodal_from_scratch/test.py -------------------------------------------------------------------------------- /train_multimodal_from_scratch/test_multi_images.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_multimodal_from_scratch/test_multi_images.py -------------------------------------------------------------------------------- /train_multimodal_from_scratch/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_multimodal_from_scratch/train.py -------------------------------------------------------------------------------- /train_multimodal_from_scratch/trainer.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_multimodal_from_scratch/trainer.ipynb -------------------------------------------------------------------------------- /train_qwen3_next_from_scratch/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_qwen3_next_from_scratch/README.md -------------------------------------------------------------------------------- /train_qwen3_next_from_scratch/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_qwen3_next_from_scratch/dataset.py -------------------------------------------------------------------------------- /train_qwen3_next_from_scratch/moe_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_qwen3_next_from_scratch/moe_test.py -------------------------------------------------------------------------------- /train_qwen3_next_from_scratch/pretrain.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_qwen3_next_from_scratch/pretrain.jpg -------------------------------------------------------------------------------- /train_qwen3_next_from_scratch/pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_qwen3_next_from_scratch/pretrain.py -------------------------------------------------------------------------------- /train_qwen3_next_from_scratch/sft.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_qwen3_next_from_scratch/sft.jpg -------------------------------------------------------------------------------- /train_qwen3_next_from_scratch/sft_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_qwen3_next_from_scratch/sft_train.py -------------------------------------------------------------------------------- /train_qwen3_next_from_scratch/test_moe.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_qwen3_next_from_scratch/test_moe.ipynb -------------------------------------------------------------------------------- /train_qwen3_next_from_scratch/tokenizer/merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_qwen3_next_from_scratch/tokenizer/merges.txt -------------------------------------------------------------------------------- /train_qwen3_next_from_scratch/tokenizer/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_qwen3_next_from_scratch/tokenizer/tokenizer.json -------------------------------------------------------------------------------- /train_qwen3_next_from_scratch/tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_qwen3_next_from_scratch/tokenizer/tokenizer_config.json -------------------------------------------------------------------------------- /train_qwen3_next_from_scratch/tokenizer/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_qwen3_next_from_scratch/tokenizer/vocab.json -------------------------------------------------------------------------------- /train_siglip_from_scratch/clip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_siglip_from_scratch/clip.png -------------------------------------------------------------------------------- /train_siglip_from_scratch/data_process.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_siglip_from_scratch/data_process.ipynb -------------------------------------------------------------------------------- /train_siglip_from_scratch/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_siglip_from_scratch/dataset.py -------------------------------------------------------------------------------- /train_siglip_from_scratch/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_siglip_from_scratch/model.py -------------------------------------------------------------------------------- /train_siglip_from_scratch/siglip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_siglip_from_scratch/siglip.png -------------------------------------------------------------------------------- /train_siglip_from_scratch/test.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_siglip_from_scratch/test.ipynb -------------------------------------------------------------------------------- /train_siglip_from_scratch/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_siglip_from_scratch/train.py -------------------------------------------------------------------------------- /train_siglip_from_scratch/伪代码.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/train_siglip_from_scratch/伪代码.png -------------------------------------------------------------------------------- /training-free_grpo/.env: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/training-free_grpo/.env -------------------------------------------------------------------------------- /training-free_grpo/compress.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/training-free_grpo/compress.py -------------------------------------------------------------------------------- /training-free_grpo/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/training-free_grpo/prompts.py -------------------------------------------------------------------------------- /training-free_grpo/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wyf3/llm_related/HEAD/training-free_grpo/train.py --------------------------------------------------------------------------------