├── .gitignore ├── LICENSE ├── README.md ├── data ├── AIME24 │ └── test.json ├── AIME25 │ └── test.json ├── AMC23 │ └── test.json ├── GPQA │ └── test.json ├── MATH-500 │ └── test.json ├── Minerva │ └── test.json ├── Olympiad-Bench │ └── test.json ├── prepare_sft.py ├── prepare_train.py ├── prepare_train_sft_rl.py └── preprocess.py ├── exp_scripts ├── train.sh ├── train_llama.sh ├── train_luffy.sh └── train_srft.sh ├── figs ├── hpt_pseudo_code.png ├── post-training_algorithms_table.png ├── results1.png ├── results2.png └── upge_overview.png └── hpt ├── LICENSE ├── deepscaler ├── __init__.py ├── globals.py ├── rewards │ ├── __init__.py │ ├── math_reward.py │ ├── math_utils │ │ ├── __init__.py │ │ └── utils.py │ └── reward_types.py ├── system_prompts.py └── utils.py ├── requirements.txt ├── scripts ├── ablations │ ├── run_deepscaler_1.5b_2k.sh │ └── run_deepscaler_1.5b_4k.sh ├── data │ ├── deepscaler_dataset.py │ ├── prepare_filter_dataset.py │ ├── prepare_openr1_data.py │ ├── prepare_openr1_data_spec_sys.py │ └── prepare_openr1_data_v6.py ├── eval │ └── eval_model.sh └── train │ ├── run_deepscaler_1.5b_16k.sh │ ├── run_deepscaler_1.5b_24k.sh │ └── run_deepscaler_1.5b_8k.sh ├── setup.py ├── source.sh ├── test.py └── verl ├── LICENSE ├── Notice.txt ├── README.md ├── docker ├── Dockerfile.ngc.vllm └── Dockerfile.vemlp.vllm.te ├── docs ├── Makefile ├── README.md ├── _static │ └── logo.png ├── advance │ ├── dpo_extension.rst │ ├── fsdp_extension.rst │ ├── megatron_extension.rst │ └── placement.rst ├── conf.py ├── examples │ ├── config.rst │ ├── gsm8k_example.rst │ └── ppo_code_architecture.rst ├── experiment │ └── ppo.rst ├── faq │ └── faq.rst ├── index.rst ├── preparation │ ├── prepare_data.rst │ └── reward_function.rst ├── requirements-docs.txt ├── start │ ├── install.rst │ └── quickstart.rst └── workers │ ├── fsdp_workers.rst │ ├── megatron_workers.rst │ └── ray_trainer.rst ├── examples ├── data_preprocess │ ├── full_hh_rlhf.py │ ├── gsm8k.py │ ├── hellaswag.py │ └── math_dataset.py ├── generation │ └── run_deepseek_v2_lite_math.sh ├── grpo_trainer │ ├── run_deepseek7b_llm.sh │ ├── run_deepseek7b_llm_seq_balance.sh │ ├── run_qwen2-7b.sh │ └── run_qwen2-7b_seq_balance.sh ├── ppo_trainer │ ├── run_deepseek7b_llm.sh │ ├── run_deepseek7b_llm_sp2.sh │ ├── run_deepseek_full_hh_rlhf.sh │ ├── run_deepseek_math_gsm8k_megatron.sh │ ├── run_deepseek_megatron.sh │ ├── run_gemma.sh │ ├── run_qwen2-7b.sh │ ├── run_qwen2-7b_rm.sh │ ├── run_qwen2-7b_rm_seq_balance.sh │ ├── run_qwen2-7b_seq_balance.sh │ ├── run_qwen2.5-32b.sh │ └── verl_getting_started.ipynb ├── ray │ └── tutorial.ipynb └── split_placement │ ├── README.md │ ├── main_ppo_split.py │ ├── run_deepseek7b_llm.sh │ └── split_monkey_patch.py ├── patches └── megatron_v4.patch ├── pyproject.toml ├── requirements.txt ├── scripts └── format.sh ├── setup.py ├── tests ├── __init__.py ├── e2e │ ├── __init__.py │ ├── check_results.py │ ├── run_qwen_gsm8k_function_rm.sh │ ├── run_qwen_gsm8k_function_rm_no_rmpad.sh │ ├── run_qwen_gsm8k_model_rm.sh │ ├── run_qwen_gsm8k_model_rm_no_rmpad.sh │ ├── run_qwen_gsm8k_model_rm_seq_balance.sh │ ├── run_qwen_gsm8k_model_rm_ulysses.sh │ ├── run_ray_trainer.sh │ └── run_ray_trainer_rmpad.sh ├── gpu_utility │ ├── test_memory_buffers.py │ ├── test_ops.py │ └── test_torch_functional.py ├── model │ ├── test_transformer.py │ └── test_transformers_ulysses.py ├── ray │ ├── test_check_worker_alive.py │ ├── test_colocated_workers.py │ ├── test_data_transfer.py │ ├── test_driverfunc_to_worker.py │ ├── test_high_level_scheduling_api.py │ ├── test_ray_local_envs.py │ ├── test_rvdz.py │ ├── test_worker_group_basics.py │ └── test_worker_group_torch.py ├── rollout │ ├── run_fsdp_vllm.py │ └── test_vllm_hf_loader.py ├── sanity │ ├── check_license.py │ └── test_import.py ├── sft │ └── run_sft.sh └── utility │ └── test_tensor_dict_utilities.py └── verl ├── __init__.py ├── mix_src ├── config │ ├── evaluation.yaml │ ├── generation.yaml │ ├── mix_ppo_trainer.yaml │ ├── ppo_megatron_trainer.yaml │ ├── ppo_trainer.yaml │ └── sft_trainer.yaml ├── entropy_math │ ├── __init__.py │ ├── grader.py │ └── math_normalize.py ├── main_mix_ppo.py ├── main_ppo_new_reward.py ├── math_verify_reward.py ├── mix_actor.py ├── mix_core_alg.py ├── mix_fsdp_worker.py ├── mix_trainer.py ├── mix_trainer_acc_rebatch.py ├── mix_vllm_rollout.py ├── prime_math │ ├── __init__.py │ ├── grader.py │ └── math_normalize.py ├── reward_with_format.py ├── rl_dataset_with_target.py └── test.py ├── models ├── README.md ├── __init__.py ├── llama │ ├── __init__.py │ └── megatron │ │ ├── __init__.py │ │ ├── checkpoint_utils │ │ ├── __init__.py │ │ ├── llama_loader.py │ │ └── llama_saver.py │ │ ├── layers │ │ ├── __init__.py │ │ ├── parallel_attention.py │ │ ├── parallel_decoder.py │ │ ├── parallel_linear.py │ │ ├── parallel_mlp.py │ │ └── parallel_rmsnorm.py │ │ └── modeling_llama_megatron.py ├── registry.py ├── transformers │ ├── __init__.py │ ├── llama.py │ ├── monkey_patch.py │ └── qwen2.py └── weight_loader_registry.py ├── protocol.py ├── single_controller ├── __init__.py ├── base │ ├── __init__.py │ ├── decorator.py │ ├── megatron │ │ ├── __init__.py │ │ ├── worker.py │ │ └── worker_group.py │ ├── register_center │ │ ├── __init__.py │ │ └── ray.py │ ├── worker.py │ └── worker_group.py ├── ray │ ├── __init__.py │ ├── base.py │ └── megatron.py └── version │ └── version ├── third_party ├── __init__.py └── vllm │ ├── __init__.py │ ├── vllm_v_0_3_1 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── tokenizer.py │ ├── weight_loaders.py │ └── worker.py │ ├── vllm_v_0_4_2 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py │ ├── vllm_v_0_5_4 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py │ └── vllm_v_0_6_3 │ ├── __init__.py │ ├── arg_utils.py │ ├── config.py │ ├── dtensor_weight_loaders.py │ ├── hf_weight_loader.py │ ├── llm.py │ ├── llm_engine_sp.py │ ├── megatron_weight_loaders.py │ ├── model_loader.py │ ├── model_runner.py │ ├── parallel_state.py │ ├── spmd_gpu_executor.py │ ├── tokenizer.py │ └── worker.py ├── trainer ├── __init__.py ├── config │ ├── evaluation.yaml │ ├── generation.yaml │ ├── ppo_megatron_trainer.yaml │ ├── ppo_trainer.yaml │ └── sft_trainer.yaml ├── fsdp_sft_trainer.py ├── main_eval.py ├── main_generation.py ├── main_ppo.py ├── ppo │ ├── __init__.py │ ├── core_algos.py │ └── ray_trainer.py └── runtime_env.yaml ├── utils ├── __init__.py ├── checkpoint │ ├── __init__.py │ ├── checkpoint_manager.py │ └── fsdp_checkpoint_manager.py ├── config.py ├── dataset │ ├── README.md │ ├── __init__.py │ ├── rl_dataset.py │ ├── rm_dataset.py │ └── sft_dataset.py ├── debug │ ├── __init__.py │ ├── performance.py │ └── trajectory_tracker.py ├── distributed.py ├── flops_counter.py ├── fs.py ├── fsdp_utils.py ├── hdfs_io.py ├── import_utils.py ├── logger │ ├── __init__.py │ └── aggregate_logger.py ├── logging_utils.py ├── megatron │ ├── __init__.py │ ├── memory.py │ ├── optimizer.py │ ├── optimizer_config.py │ ├── pipeline_parallel.py │ ├── sequence_parallel.py │ └── tensor_parallel.py ├── megatron_utils.py ├── memory_buffer.py ├── model.py ├── py_functional.py ├── ray_utils.py ├── rendezvous │ ├── __init__.py │ └── ray_backend.py ├── reward_score │ ├── __init__.py │ ├── gsm8k.py │ └── math.py ├── seqlen_balancing.py ├── tokenizer.py ├── torch_dtypes.py ├── torch_functional.py ├── tracking.py └── ulysses.py ├── version └── version └── workers ├── __init__.py ├── actor ├── __init__.py ├── base.py ├── dp_actor.py └── megatron_actor.py ├── critic ├── __init__.py ├── base.py ├── dp_critic.py └── megatron_critic.py ├── fsdp_workers.py ├── megatron_workers.py ├── reward_model ├── __init__.py ├── base.py └── megatron │ ├── __init__.py │ └── reward_model.py ├── rollout ├── __init__.py ├── base.py ├── hf_rollout.py ├── naive │ ├── __init__.py │ └── naive_rollout.py ├── tokenizer.py └── vllm_rollout │ ├── __init__.py │ └── vllm_rollout.py └── sharding_manager ├── __init__.py ├── base.py ├── fsdp_ulysses.py ├── fsdp_vllm.py └── megatron_vllm.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/README.md -------------------------------------------------------------------------------- /data/AIME24/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/data/AIME24/test.json -------------------------------------------------------------------------------- /data/AIME25/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/data/AIME25/test.json -------------------------------------------------------------------------------- /data/AMC23/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/data/AMC23/test.json -------------------------------------------------------------------------------- /data/GPQA/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/data/GPQA/test.json -------------------------------------------------------------------------------- /data/MATH-500/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/data/MATH-500/test.json -------------------------------------------------------------------------------- /data/Minerva/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/data/Minerva/test.json -------------------------------------------------------------------------------- /data/Olympiad-Bench/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/data/Olympiad-Bench/test.json -------------------------------------------------------------------------------- /data/prepare_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/data/prepare_sft.py -------------------------------------------------------------------------------- /data/prepare_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/data/prepare_train.py -------------------------------------------------------------------------------- /data/prepare_train_sft_rl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/data/prepare_train_sft_rl.py -------------------------------------------------------------------------------- /data/preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/data/preprocess.py -------------------------------------------------------------------------------- /exp_scripts/train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/exp_scripts/train.sh -------------------------------------------------------------------------------- /exp_scripts/train_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/exp_scripts/train_llama.sh -------------------------------------------------------------------------------- /exp_scripts/train_luffy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/exp_scripts/train_luffy.sh -------------------------------------------------------------------------------- /exp_scripts/train_srft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/exp_scripts/train_srft.sh -------------------------------------------------------------------------------- /figs/hpt_pseudo_code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/figs/hpt_pseudo_code.png -------------------------------------------------------------------------------- /figs/post-training_algorithms_table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/figs/post-training_algorithms_table.png -------------------------------------------------------------------------------- /figs/results1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/figs/results1.png -------------------------------------------------------------------------------- /figs/results2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/figs/results2.png -------------------------------------------------------------------------------- /figs/upge_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/figs/upge_overview.png -------------------------------------------------------------------------------- /hpt/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/LICENSE -------------------------------------------------------------------------------- /hpt/deepscaler/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hpt/deepscaler/globals.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/deepscaler/globals.py -------------------------------------------------------------------------------- /hpt/deepscaler/rewards/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/deepscaler/rewards/__init__.py -------------------------------------------------------------------------------- /hpt/deepscaler/rewards/math_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/deepscaler/rewards/math_reward.py -------------------------------------------------------------------------------- /hpt/deepscaler/rewards/math_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/deepscaler/rewards/math_utils/__init__.py -------------------------------------------------------------------------------- /hpt/deepscaler/rewards/math_utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/deepscaler/rewards/math_utils/utils.py -------------------------------------------------------------------------------- /hpt/deepscaler/rewards/reward_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/deepscaler/rewards/reward_types.py -------------------------------------------------------------------------------- /hpt/deepscaler/system_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/deepscaler/system_prompts.py -------------------------------------------------------------------------------- /hpt/deepscaler/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/deepscaler/utils.py -------------------------------------------------------------------------------- /hpt/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/requirements.txt -------------------------------------------------------------------------------- /hpt/scripts/ablations/run_deepscaler_1.5b_2k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/scripts/ablations/run_deepscaler_1.5b_2k.sh -------------------------------------------------------------------------------- /hpt/scripts/ablations/run_deepscaler_1.5b_4k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/scripts/ablations/run_deepscaler_1.5b_4k.sh -------------------------------------------------------------------------------- /hpt/scripts/data/deepscaler_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/scripts/data/deepscaler_dataset.py -------------------------------------------------------------------------------- /hpt/scripts/data/prepare_filter_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/scripts/data/prepare_filter_dataset.py -------------------------------------------------------------------------------- /hpt/scripts/data/prepare_openr1_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/scripts/data/prepare_openr1_data.py -------------------------------------------------------------------------------- /hpt/scripts/data/prepare_openr1_data_spec_sys.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/scripts/data/prepare_openr1_data_spec_sys.py -------------------------------------------------------------------------------- /hpt/scripts/data/prepare_openr1_data_v6.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/scripts/data/prepare_openr1_data_v6.py -------------------------------------------------------------------------------- /hpt/scripts/eval/eval_model.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/scripts/eval/eval_model.sh -------------------------------------------------------------------------------- /hpt/scripts/train/run_deepscaler_1.5b_16k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/scripts/train/run_deepscaler_1.5b_16k.sh -------------------------------------------------------------------------------- /hpt/scripts/train/run_deepscaler_1.5b_24k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/scripts/train/run_deepscaler_1.5b_24k.sh -------------------------------------------------------------------------------- /hpt/scripts/train/run_deepscaler_1.5b_8k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/scripts/train/run_deepscaler_1.5b_8k.sh -------------------------------------------------------------------------------- /hpt/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/setup.py -------------------------------------------------------------------------------- /hpt/source.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/source.sh -------------------------------------------------------------------------------- /hpt/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/test.py -------------------------------------------------------------------------------- /hpt/verl/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/LICENSE -------------------------------------------------------------------------------- /hpt/verl/Notice.txt: -------------------------------------------------------------------------------- 1 | Copyright 2023-2024 Bytedance Ltd. and/or its affiliates -------------------------------------------------------------------------------- /hpt/verl/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/README.md -------------------------------------------------------------------------------- /hpt/verl/docker/Dockerfile.ngc.vllm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docker/Dockerfile.ngc.vllm -------------------------------------------------------------------------------- /hpt/verl/docker/Dockerfile.vemlp.vllm.te: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docker/Dockerfile.vemlp.vllm.te -------------------------------------------------------------------------------- /hpt/verl/docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/Makefile -------------------------------------------------------------------------------- /hpt/verl/docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/README.md -------------------------------------------------------------------------------- /hpt/verl/docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/_static/logo.png -------------------------------------------------------------------------------- /hpt/verl/docs/advance/dpo_extension.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/advance/dpo_extension.rst -------------------------------------------------------------------------------- /hpt/verl/docs/advance/fsdp_extension.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/advance/fsdp_extension.rst -------------------------------------------------------------------------------- /hpt/verl/docs/advance/megatron_extension.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/advance/megatron_extension.rst -------------------------------------------------------------------------------- /hpt/verl/docs/advance/placement.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/advance/placement.rst -------------------------------------------------------------------------------- /hpt/verl/docs/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/conf.py -------------------------------------------------------------------------------- /hpt/verl/docs/examples/config.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/examples/config.rst -------------------------------------------------------------------------------- /hpt/verl/docs/examples/gsm8k_example.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/examples/gsm8k_example.rst -------------------------------------------------------------------------------- /hpt/verl/docs/examples/ppo_code_architecture.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/examples/ppo_code_architecture.rst -------------------------------------------------------------------------------- /hpt/verl/docs/experiment/ppo.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/experiment/ppo.rst -------------------------------------------------------------------------------- /hpt/verl/docs/faq/faq.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/faq/faq.rst -------------------------------------------------------------------------------- /hpt/verl/docs/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/index.rst -------------------------------------------------------------------------------- /hpt/verl/docs/preparation/prepare_data.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/preparation/prepare_data.rst -------------------------------------------------------------------------------- /hpt/verl/docs/preparation/reward_function.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/preparation/reward_function.rst -------------------------------------------------------------------------------- /hpt/verl/docs/requirements-docs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/requirements-docs.txt -------------------------------------------------------------------------------- /hpt/verl/docs/start/install.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/start/install.rst -------------------------------------------------------------------------------- /hpt/verl/docs/start/quickstart.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/start/quickstart.rst -------------------------------------------------------------------------------- /hpt/verl/docs/workers/fsdp_workers.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/workers/fsdp_workers.rst -------------------------------------------------------------------------------- /hpt/verl/docs/workers/megatron_workers.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/workers/megatron_workers.rst -------------------------------------------------------------------------------- /hpt/verl/docs/workers/ray_trainer.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/docs/workers/ray_trainer.rst -------------------------------------------------------------------------------- /hpt/verl/examples/data_preprocess/full_hh_rlhf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/data_preprocess/full_hh_rlhf.py -------------------------------------------------------------------------------- /hpt/verl/examples/data_preprocess/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/data_preprocess/gsm8k.py -------------------------------------------------------------------------------- /hpt/verl/examples/data_preprocess/hellaswag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/data_preprocess/hellaswag.py -------------------------------------------------------------------------------- /hpt/verl/examples/data_preprocess/math_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/data_preprocess/math_dataset.py -------------------------------------------------------------------------------- /hpt/verl/examples/generation/run_deepseek_v2_lite_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/generation/run_deepseek_v2_lite_math.sh -------------------------------------------------------------------------------- /hpt/verl/examples/grpo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/grpo_trainer/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /hpt/verl/examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/grpo_trainer/run_deepseek7b_llm_seq_balance.sh -------------------------------------------------------------------------------- /hpt/verl/examples/grpo_trainer/run_qwen2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/grpo_trainer/run_qwen2-7b.sh -------------------------------------------------------------------------------- /hpt/verl/examples/grpo_trainer/run_qwen2-7b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/grpo_trainer/run_qwen2-7b_seq_balance.sh -------------------------------------------------------------------------------- /hpt/verl/examples/ppo_trainer/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/ppo_trainer/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /hpt/verl/examples/ppo_trainer/run_deepseek7b_llm_sp2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/ppo_trainer/run_deepseek7b_llm_sp2.sh -------------------------------------------------------------------------------- /hpt/verl/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh -------------------------------------------------------------------------------- /hpt/verl/examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh -------------------------------------------------------------------------------- /hpt/verl/examples/ppo_trainer/run_deepseek_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/ppo_trainer/run_deepseek_megatron.sh -------------------------------------------------------------------------------- /hpt/verl/examples/ppo_trainer/run_gemma.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/ppo_trainer/run_gemma.sh -------------------------------------------------------------------------------- /hpt/verl/examples/ppo_trainer/run_qwen2-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/ppo_trainer/run_qwen2-7b.sh -------------------------------------------------------------------------------- /hpt/verl/examples/ppo_trainer/run_qwen2-7b_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/ppo_trainer/run_qwen2-7b_rm.sh -------------------------------------------------------------------------------- /hpt/verl/examples/ppo_trainer/run_qwen2-7b_rm_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/ppo_trainer/run_qwen2-7b_rm_seq_balance.sh -------------------------------------------------------------------------------- /hpt/verl/examples/ppo_trainer/run_qwen2-7b_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/ppo_trainer/run_qwen2-7b_seq_balance.sh -------------------------------------------------------------------------------- /hpt/verl/examples/ppo_trainer/run_qwen2.5-32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/ppo_trainer/run_qwen2.5-32b.sh -------------------------------------------------------------------------------- /hpt/verl/examples/ppo_trainer/verl_getting_started.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/ppo_trainer/verl_getting_started.ipynb -------------------------------------------------------------------------------- /hpt/verl/examples/ray/tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/ray/tutorial.ipynb -------------------------------------------------------------------------------- /hpt/verl/examples/split_placement/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/split_placement/README.md -------------------------------------------------------------------------------- /hpt/verl/examples/split_placement/main_ppo_split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/split_placement/main_ppo_split.py -------------------------------------------------------------------------------- /hpt/verl/examples/split_placement/run_deepseek7b_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/split_placement/run_deepseek7b_llm.sh -------------------------------------------------------------------------------- /hpt/verl/examples/split_placement/split_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/examples/split_placement/split_monkey_patch.py -------------------------------------------------------------------------------- /hpt/verl/patches/megatron_v4.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/patches/megatron_v4.patch -------------------------------------------------------------------------------- /hpt/verl/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/pyproject.toml -------------------------------------------------------------------------------- /hpt/verl/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/requirements.txt -------------------------------------------------------------------------------- /hpt/verl/scripts/format.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/scripts/format.sh -------------------------------------------------------------------------------- /hpt/verl/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/setup.py -------------------------------------------------------------------------------- /hpt/verl/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/__init__.py -------------------------------------------------------------------------------- /hpt/verl/tests/e2e/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/e2e/__init__.py -------------------------------------------------------------------------------- /hpt/verl/tests/e2e/check_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/e2e/check_results.py -------------------------------------------------------------------------------- /hpt/verl/tests/e2e/run_qwen_gsm8k_function_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/e2e/run_qwen_gsm8k_function_rm.sh -------------------------------------------------------------------------------- /hpt/verl/tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh -------------------------------------------------------------------------------- /hpt/verl/tests/e2e/run_qwen_gsm8k_model_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/e2e/run_qwen_gsm8k_model_rm.sh -------------------------------------------------------------------------------- /hpt/verl/tests/e2e/run_qwen_gsm8k_model_rm_no_rmpad.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/e2e/run_qwen_gsm8k_model_rm_no_rmpad.sh -------------------------------------------------------------------------------- /hpt/verl/tests/e2e/run_qwen_gsm8k_model_rm_seq_balance.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/e2e/run_qwen_gsm8k_model_rm_seq_balance.sh -------------------------------------------------------------------------------- /hpt/verl/tests/e2e/run_qwen_gsm8k_model_rm_ulysses.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/e2e/run_qwen_gsm8k_model_rm_ulysses.sh -------------------------------------------------------------------------------- /hpt/verl/tests/e2e/run_ray_trainer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/e2e/run_ray_trainer.sh -------------------------------------------------------------------------------- /hpt/verl/tests/e2e/run_ray_trainer_rmpad.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/e2e/run_ray_trainer_rmpad.sh -------------------------------------------------------------------------------- /hpt/verl/tests/gpu_utility/test_memory_buffers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/gpu_utility/test_memory_buffers.py -------------------------------------------------------------------------------- /hpt/verl/tests/gpu_utility/test_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/gpu_utility/test_ops.py -------------------------------------------------------------------------------- /hpt/verl/tests/gpu_utility/test_torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/gpu_utility/test_torch_functional.py -------------------------------------------------------------------------------- /hpt/verl/tests/model/test_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/model/test_transformer.py -------------------------------------------------------------------------------- /hpt/verl/tests/model/test_transformers_ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/model/test_transformers_ulysses.py -------------------------------------------------------------------------------- /hpt/verl/tests/ray/test_check_worker_alive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/ray/test_check_worker_alive.py -------------------------------------------------------------------------------- /hpt/verl/tests/ray/test_colocated_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/ray/test_colocated_workers.py -------------------------------------------------------------------------------- /hpt/verl/tests/ray/test_data_transfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/ray/test_data_transfer.py -------------------------------------------------------------------------------- /hpt/verl/tests/ray/test_driverfunc_to_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/ray/test_driverfunc_to_worker.py -------------------------------------------------------------------------------- /hpt/verl/tests/ray/test_high_level_scheduling_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/ray/test_high_level_scheduling_api.py -------------------------------------------------------------------------------- /hpt/verl/tests/ray/test_ray_local_envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/ray/test_ray_local_envs.py -------------------------------------------------------------------------------- /hpt/verl/tests/ray/test_rvdz.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/ray/test_rvdz.py -------------------------------------------------------------------------------- /hpt/verl/tests/ray/test_worker_group_basics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/ray/test_worker_group_basics.py -------------------------------------------------------------------------------- /hpt/verl/tests/ray/test_worker_group_torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/ray/test_worker_group_torch.py -------------------------------------------------------------------------------- /hpt/verl/tests/rollout/run_fsdp_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/rollout/run_fsdp_vllm.py -------------------------------------------------------------------------------- /hpt/verl/tests/rollout/test_vllm_hf_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/rollout/test_vllm_hf_loader.py -------------------------------------------------------------------------------- /hpt/verl/tests/sanity/check_license.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/sanity/check_license.py -------------------------------------------------------------------------------- /hpt/verl/tests/sanity/test_import.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/sanity/test_import.py -------------------------------------------------------------------------------- /hpt/verl/tests/sft/run_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/sft/run_sft.sh -------------------------------------------------------------------------------- /hpt/verl/tests/utility/test_tensor_dict_utilities.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/tests/utility/test_tensor_dict_utilities.py -------------------------------------------------------------------------------- /hpt/verl/verl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/config/evaluation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/config/evaluation.yaml -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/config/generation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/config/generation.yaml -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/config/mix_ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/config/mix_ppo_trainer.yaml -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/config/ppo_megatron_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/config/ppo_megatron_trainer.yaml -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/config/ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/config/ppo_trainer.yaml -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/config/sft_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/config/sft_trainer.yaml -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/entropy_math/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/entropy_math/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/entropy_math/grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/entropy_math/grader.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/entropy_math/math_normalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/entropy_math/math_normalize.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/main_mix_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/main_mix_ppo.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/main_ppo_new_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/main_ppo_new_reward.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/math_verify_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/math_verify_reward.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/mix_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/mix_actor.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/mix_core_alg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/mix_core_alg.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/mix_fsdp_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/mix_fsdp_worker.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/mix_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/mix_trainer.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/mix_trainer_acc_rebatch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/mix_trainer_acc_rebatch.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/mix_vllm_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/mix_vllm_rollout.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/prime_math/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/prime_math/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/prime_math/grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/prime_math/grader.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/prime_math/math_normalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/prime_math/math_normalize.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/reward_with_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/reward_with_format.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/rl_dataset_with_target.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/rl_dataset_with_target.py -------------------------------------------------------------------------------- /hpt/verl/verl/mix_src/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/mix_src/test.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/README.md -------------------------------------------------------------------------------- /hpt/verl/verl/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/llama/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/llama/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/llama/megatron/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/llama/megatron/checkpoint_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/llama/megatron/checkpoint_utils/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/llama/megatron/checkpoint_utils/llama_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/llama/megatron/checkpoint_utils/llama_loader.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/llama/megatron/checkpoint_utils/llama_saver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/llama/megatron/checkpoint_utils/llama_saver.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/llama/megatron/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/llama/megatron/layers/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/llama/megatron/layers/parallel_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/llama/megatron/layers/parallel_attention.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/llama/megatron/layers/parallel_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/llama/megatron/layers/parallel_decoder.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/llama/megatron/layers/parallel_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/llama/megatron/layers/parallel_linear.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/llama/megatron/layers/parallel_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/llama/megatron/layers/parallel_mlp.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/llama/megatron/layers/parallel_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/llama/megatron/layers/parallel_rmsnorm.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/llama/megatron/modeling_llama_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/llama/megatron/modeling_llama_megatron.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/registry.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/transformers/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/transformers/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/transformers/llama.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/transformers/monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/transformers/monkey_patch.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/transformers/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/transformers/qwen2.py -------------------------------------------------------------------------------- /hpt/verl/verl/models/weight_loader_registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/models/weight_loader_registry.py -------------------------------------------------------------------------------- /hpt/verl/verl/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/protocol.py -------------------------------------------------------------------------------- /hpt/verl/verl/single_controller/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/single_controller/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/single_controller/base/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/single_controller/base/decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/single_controller/base/decorator.py -------------------------------------------------------------------------------- /hpt/verl/verl/single_controller/base/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/single_controller/base/megatron/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/single_controller/base/megatron/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/single_controller/base/megatron/worker.py -------------------------------------------------------------------------------- /hpt/verl/verl/single_controller/base/megatron/worker_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/single_controller/base/megatron/worker_group.py -------------------------------------------------------------------------------- /hpt/verl/verl/single_controller/base/register_center/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/single_controller/base/register_center/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/single_controller/base/register_center/ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/single_controller/base/register_center/ray.py -------------------------------------------------------------------------------- /hpt/verl/verl/single_controller/base/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/single_controller/base/worker.py -------------------------------------------------------------------------------- /hpt/verl/verl/single_controller/base/worker_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/single_controller/base/worker_group.py -------------------------------------------------------------------------------- /hpt/verl/verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/single_controller/ray/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/single_controller/ray/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/single_controller/ray/base.py -------------------------------------------------------------------------------- /hpt/verl/verl/single_controller/ray/megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/single_controller/ray/megatron.py -------------------------------------------------------------------------------- /hpt/verl/verl/single_controller/version/version: -------------------------------------------------------------------------------- 1 | 0.0.2 -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/arg_utils.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/config.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/llm.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/llm_engine_sp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/llm_engine_sp.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/model_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/model_loader.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/model_runner.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/parallel_state.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/tokenizer.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/weight_loaders.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_3_1/worker.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/arg_utils.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/config.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/dtensor_weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/dtensor_weight_loaders.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/hf_weight_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/hf_weight_loader.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/llm.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/llm_engine_sp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/llm_engine_sp.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/megatron_weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/megatron_weight_loaders.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/model_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/model_loader.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/model_runner.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/parallel_state.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/spmd_gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/spmd_gpu_executor.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/tokenizer.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_4_2/worker.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/arg_utils.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/config.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/dtensor_weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/dtensor_weight_loaders.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/llm.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/llm_engine_sp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/llm_engine_sp.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/megatron_weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/megatron_weight_loaders.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/model_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/model_loader.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/model_runner.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/spmd_gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/spmd_gpu_executor.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/tokenizer.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_5_4/worker.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/arg_utils.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/config.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/dtensor_weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/dtensor_weight_loaders.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/llm.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/llm_engine_sp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/llm_engine_sp.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/megatron_weight_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/megatron_weight_loaders.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/model_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/model_loader.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/model_runner.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/parallel_state.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/spmd_gpu_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/spmd_gpu_executor.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py -------------------------------------------------------------------------------- /hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/third_party/vllm/vllm_v_0_6_3/worker.py -------------------------------------------------------------------------------- /hpt/verl/verl/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/trainer/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/trainer/config/evaluation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/trainer/config/evaluation.yaml -------------------------------------------------------------------------------- /hpt/verl/verl/trainer/config/generation.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/trainer/config/generation.yaml -------------------------------------------------------------------------------- /hpt/verl/verl/trainer/config/ppo_megatron_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/trainer/config/ppo_megatron_trainer.yaml -------------------------------------------------------------------------------- /hpt/verl/verl/trainer/config/ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/trainer/config/ppo_trainer.yaml -------------------------------------------------------------------------------- /hpt/verl/verl/trainer/config/sft_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/trainer/config/sft_trainer.yaml -------------------------------------------------------------------------------- /hpt/verl/verl/trainer/fsdp_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/trainer/fsdp_sft_trainer.py -------------------------------------------------------------------------------- /hpt/verl/verl/trainer/main_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/trainer/main_eval.py -------------------------------------------------------------------------------- /hpt/verl/verl/trainer/main_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/trainer/main_generation.py -------------------------------------------------------------------------------- /hpt/verl/verl/trainer/main_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/trainer/main_ppo.py -------------------------------------------------------------------------------- /hpt/verl/verl/trainer/ppo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/trainer/ppo/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/trainer/ppo/core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/trainer/ppo/core_algos.py -------------------------------------------------------------------------------- /hpt/verl/verl/trainer/ppo/ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/trainer/ppo/ray_trainer.py -------------------------------------------------------------------------------- /hpt/verl/verl/trainer/runtime_env.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/trainer/runtime_env.yaml -------------------------------------------------------------------------------- /hpt/verl/verl/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hpt/verl/verl/utils/checkpoint/checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/checkpoint/checkpoint_manager.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/checkpoint/fsdp_checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/checkpoint/fsdp_checkpoint_manager.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/config.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/dataset/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/dataset/README.md -------------------------------------------------------------------------------- /hpt/verl/verl/utils/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/dataset/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/dataset/rl_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/dataset/rl_dataset.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/dataset/rm_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/dataset/rm_dataset.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/dataset/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/dataset/sft_dataset.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/debug/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/debug/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/debug/performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/debug/performance.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/debug/trajectory_tracker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/debug/trajectory_tracker.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/distributed.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/flops_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/flops_counter.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/fs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/fs.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/fsdp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/fsdp_utils.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/hdfs_io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/hdfs_io.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/import_utils.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/logger/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/logger/aggregate_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/logger/aggregate_logger.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/logging_utils.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/megatron/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/megatron/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/megatron/memory.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/megatron/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/megatron/optimizer.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/megatron/optimizer_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/megatron/optimizer_config.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/megatron/pipeline_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/megatron/pipeline_parallel.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/megatron/sequence_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/megatron/sequence_parallel.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/megatron/tensor_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/megatron/tensor_parallel.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/megatron_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/megatron_utils.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/memory_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/memory_buffer.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/model.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/py_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/py_functional.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/ray_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/ray_utils.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/rendezvous/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/rendezvous/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/rendezvous/ray_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/rendezvous/ray_backend.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/reward_score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/reward_score/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/reward_score/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/reward_score/gsm8k.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/reward_score/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/reward_score/math.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/seqlen_balancing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/seqlen_balancing.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/tokenizer.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/torch_dtypes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/torch_dtypes.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/torch_functional.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/tracking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/tracking.py -------------------------------------------------------------------------------- /hpt/verl/verl/utils/ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/utils/ulysses.py -------------------------------------------------------------------------------- /hpt/verl/verl/version/version: -------------------------------------------------------------------------------- 1 | 0.1 -------------------------------------------------------------------------------- /hpt/verl/verl/workers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/actor/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/actor/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/actor/base.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/actor/dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/actor/dp_actor.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/actor/megatron_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/actor/megatron_actor.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/critic/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/critic/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/critic/base.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/critic/dp_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/critic/dp_critic.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/critic/megatron_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/critic/megatron_critic.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/fsdp_workers.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/megatron_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/megatron_workers.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/reward_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/reward_model/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/reward_model/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/reward_model/base.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/reward_model/megatron/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/reward_model/megatron/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/reward_model/megatron/reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/reward_model/megatron/reward_model.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/rollout/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/rollout/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/rollout/base.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/rollout/hf_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/rollout/hf_rollout.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/rollout/naive/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/rollout/naive/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/rollout/naive/naive_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/rollout/naive/naive_rollout.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/rollout/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/rollout/tokenizer.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/rollout/vllm_rollout/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/rollout/vllm_rollout/vllm_rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/rollout/vllm_rollout/vllm_rollout.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/sharding_manager/__init__.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/sharding_manager/base.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/sharding_manager/fsdp_ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/sharding_manager/fsdp_ulysses.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/sharding_manager/fsdp_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/sharding_manager/fsdp_vllm.py -------------------------------------------------------------------------------- /hpt/verl/verl/workers/sharding_manager/megatron_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TsinghuaC3I/Unify-Post-Training/HEAD/hpt/verl/verl/workers/sharding_manager/megatron_vllm.py --------------------------------------------------------------------------------