├── .gitignore
├── .readthedocs.yaml
├── .style.yapf
├── LICENSE
├── Notice.txt
├── Readme.md
├── main_GRPO.sh
├── main_TRPA.sh
├── patches
    └── megatron_v4.patch
├── pyproject.toml
├── requirements.txt
├── scripts
    ├── data_preprocess
    │   └── kk_data_process.py
    └── eval
    │   ├── eval_with_generation.sh
    │   └── eval_with_generation_math.sh
├── setup.py
├── tests
    ├── __init__.py
    ├── e2e
    │   ├── __init__.py
    │   ├── arithmetic_sequence
    │   │   ├── data
    │   │   │   ├── create_dataset.py
    │   │   │   ├── test.parquet
    │   │   │   └── train.parquet
    │   │   ├── model
    │   │   │   ├── config.json
    │   │   │   ├── create_model_tokenizer.py
    │   │   │   ├── generation_config.json
    │   │   │   ├── model.safetensors
    │   │   │   └── tokenizer_config.json
    │   │   └── rl
    │   │   │   ├── README.md
    │   │   │   ├── config
    │   │   │       └── ray_trainer.yaml
    │   │   │   └── main_trainer.py
    │   ├── check_results.py
    │   ├── envs
    │   │   ├── __init__.py
    │   │   └── digit_completion
    │   │   │   ├── __init__.py
    │   │   │   ├── task.py
    │   │   │   └── tokenizer.py
    │   ├── run_qwen_gsm8k_function_rm.sh
    │   ├── run_qwen_gsm8k_function_rm_no_rmpad.sh
    │   ├── run_qwen_gsm8k_model_rm.sh
    │   ├── run_qwen_gsm8k_model_rm_no_rmpad.sh
    │   ├── run_qwen_gsm8k_model_rm_seq_balance.sh
    │   ├── run_qwen_gsm8k_model_rm_ulysses.sh
    │   ├── run_ray_trainer.sh
    │   └── run_ray_trainer_rmpad.sh
    ├── gpu_utility
    │   ├── test_memory_buffers.py
    │   ├── test_ops.py
    │   └── test_torch_functional.py
    ├── model
    │   ├── test_transformer.py
    │   └── test_transformers_ulysses.py
    ├── ray
    │   ├── check_worker_alive
    │   │   └── main.py
    │   ├── detached_worker
    │   │   ├── README.md
    │   │   ├── client.py
    │   │   ├── run.sh
    │   │   └── server.py
    │   ├── test_check_worker_alive.py
    │   ├── test_colocated_workers.py
    │   ├── test_data_transfer.py
    │   ├── test_driverfunc_to_worker.py
    │   ├── test_high_level_scheduling_api.py
    │   ├── test_ray_local_envs.py
    │   ├── test_rvdz.py
    │   ├── test_worker_group_basics.py
    │   └── test_worker_group_torch.py
    ├── rollout
    │   ├── run_fsdp_vllm.py
    │   └── test_vllm_hf_loader.py
    ├── sanity
    │   ├── check_license.py
    │   └── test_import.py
    ├── utility
    │   └── test_tensor_dict_utilities.py
    └── verl
    │   └── utils
    │       └── dataset
    │           ├── test_rl_dataset.py
    │           ├── test_rm_dataset.py
    │           └── test_sft_dataset.py
└── verl
    ├── __init__.py
    ├── models
        ├── README.md
        ├── __init__.py
        ├── llama
        │   ├── __init__.py
        │   └── megatron
        │   │   ├── __init__.py
        │   │   ├── checkpoint_utils
        │   │       ├── __init__.py
        │   │       ├── llama_loader.py
        │   │       └── llama_saver.py
        │   │   ├── layers
        │   │       ├── __init__.py
        │   │       ├── parallel_attention.py
        │   │       ├── parallel_decoder.py
        │   │       ├── parallel_linear.py
        │   │       ├── parallel_mlp.py
        │   │       └── parallel_rmsnorm.py
        │   │   └── modeling_llama_megatron.py
        ├── registry.py
        ├── transformers
        │   ├── __init__.py
        │   ├── llama.py
        │   ├── monkey_patch.py
        │   └── qwen2.py
        └── weight_loader_registry.py
    ├── protocol.py
    ├── single_controller
        ├── __init__.py
        ├── base
        │   ├── __init__.py
        │   ├── decorator.py
        │   ├── megatron
        │   │   ├── __init__.py
        │   │   ├── worker.py
        │   │   └── worker_group.py
        │   ├── register_center
        │   │   ├── __init__.py
        │   │   └── ray.py
        │   ├── worker.py
        │   └── worker_group.py
        ├── ray
        │   ├── __init__.py
        │   ├── base.py
        │   └── megatron.py
        └── version
        │   └── version
    ├── third_party
        ├── __init__.py
        └── vllm
        │   ├── __init__.py
        │   ├── vllm_v_0_3_1
        │       ├── __init__.py
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── tokenizer.py
        │       ├── weight_loaders.py
        │       └── worker.py
        │   ├── vllm_v_0_4_2
        │       ├── __init__.py
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── dtensor_weight_loaders.py
        │       ├── hf_weight_loader.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── megatron_weight_loaders.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── spmd_gpu_executor.py
        │       ├── tokenizer.py
        │       └── worker.py
        │   ├── vllm_v_0_5_4
        │       ├── __init__.py
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── dtensor_weight_loaders.py
        │       ├── hf_weight_loader.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── megatron_weight_loaders.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── spmd_gpu_executor.py
        │       ├── tokenizer.py
        │       └── worker.py
        │   └── vllm_v_0_6_3
        │       ├── __init__.py
        │       ├── arg_utils.py
        │       ├── config.py
        │       ├── dtensor_weight_loaders.py
        │       ├── hf_weight_loader.py
        │       ├── llm.py
        │       ├── llm_engine_sp.py
        │       ├── megatron_weight_loaders.py
        │       ├── model_loader.py
        │       ├── model_runner.py
        │       ├── parallel_state.py
        │       ├── spmd_gpu_executor.py
        │       ├── tokenizer.py
        │       └── worker.py
    ├── trainer
        ├── __init__.py
        ├── config
        │   ├── eval_with_generation.yaml
        │   ├── evaluation.yaml
        │   ├── generation.yaml
        │   ├── ppo_megatron_trainer.yaml
        │   ├── ppo_trainer.yaml
        │   └── sft_trainer.yaml
        ├── fsdp_sft_trainer.py
        ├── main_eval.py
        ├── main_eval_with_generation.py
        ├── main_generation.py
        ├── main_ppo.py
        ├── ppo
        │   ├── __init__.py
        │   ├── core_algos.py
        │   └── ray_trainer.py
        └── runtime_env.yaml
    ├── utils
        ├── __init__.py
        ├── config.py
        ├── dataset
        │   ├── README.md
        │   ├── __init__.py
        │   ├── rl_dataset.py
        │   ├── rm_dataset.py
        │   └── sft_dataset.py
        ├── debug
        │   ├── __init__.py
        │   ├── performance.py
        │   └── trajectory_tracker.py
        ├── distributed.py
        ├── flops_counter.py
        ├── fs.py
        ├── fsdp_utils.py
        ├── hdfs_io.py
        ├── import_utils.py
        ├── logger
        │   ├── __init__.py
        │   └── aggregate_logger.py
        ├── logging_utils.py
        ├── megatron
        │   ├── __init__.py
        │   ├── memory.py
        │   ├── optimizer.py
        │   ├── optimizer_config.py
        │   ├── pipeline_parallel.py
        │   ├── sequence_parallel.py
        │   └── tensor_parallel.py
        ├── megatron_utils.py
        ├── memory_buffer.py
        ├── model.py
        ├── py_functional.py
        ├── ray_utils.py
        ├── rendezvous
        │   ├── __init__.py
        │   └── ray_backend.py
        ├── reward_score
        │   ├── __init__.py
        │   ├── countdown.py
        │   ├── deepscaler
        │   │   ├── globals.py
        │   │   ├── rewards
        │   │   │   ├── __init__.py
        │   │   │   ├── math_reward.py
        │   │   │   ├── math_reward_pl.py
        │   │   │   ├── math_utils
        │   │   │   │   ├── __init__.py
        │   │   │   │   └── utils.py
        │   │   │   └── reward_types.py
        │   │   ├── system_prompts.py
        │   │   └── utils.py
        │   ├── gsm8k.py
        │   ├── kk.py
        │   ├── math.py
        │   └── multiply.py
        ├── seqlen_balancing.py
        ├── tokenizer.py
        ├── torch_dtypes.py
        ├── torch_functional.py
        ├── tracking.py
        └── ulysses.py
    ├── version
        └── version
    └── workers
        ├── __init__.py
        ├── actor
            ├── __init__.py
            ├── base.py
            ├── dp_actor.py
            └── megatron_actor.py
        ├── critic
            ├── __init__.py
            ├── base.py
            ├── dp_critic.py
            └── megatron_critic.py
        ├── fsdp_workers.py
        ├── megatron_workers.py
        ├── reward_model
            ├── __init__.py
            ├── base.py
            └── megatron
            │   ├── __init__.py
            │   └── reward_model.py
        ├── rollout
            ├── __init__.py
            ├── base.py
            ├── hf_rollout.py
            ├── naive
            │   ├── __init__.py
            │   └── naive_rollout.py
            ├── tokenizer.py
            └── vllm_rollout
            │   ├── __init__.py
            │   └── vllm_rollout.py
        └── sharding_manager
            ├── __init__.py
            ├── base.py
            ├── fsdp_ulysses.py
            ├── fsdp_vllm.py
            └── megatron_vllm.py


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/.gitignore


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.style.yapf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/.style.yapf


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/LICENSE


--------------------------------------------------------------------------------
/Notice.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/Readme.md


--------------------------------------------------------------------------------
/main_GRPO.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/main_GRPO.sh


--------------------------------------------------------------------------------
/main_TRPA.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/main_TRPA.sh


--------------------------------------------------------------------------------
/patches/megatron_v4.patch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/patches/megatron_v4.patch


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/pyproject.toml


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/requirements.txt


--------------------------------------------------------------------------------
/scripts/data_preprocess/kk_data_process.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/scripts/data_preprocess/kk_data_process.py


--------------------------------------------------------------------------------
/scripts/eval/eval_with_generation.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/scripts/eval/eval_with_generation.sh


--------------------------------------------------------------------------------
/scripts/eval/eval_with_generation_math.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/scripts/eval/eval_with_generation_math.sh


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/setup.py


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/__init__.py


--------------------------------------------------------------------------------
/tests/e2e/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/__init__.py


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/data/create_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/arithmetic_sequence/data/create_dataset.py


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/data/test.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/arithmetic_sequence/data/test.parquet


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/data/train.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/arithmetic_sequence/data/train.parquet


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/config.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/arithmetic_sequence/model/config.json


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/create_model_tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/arithmetic_sequence/model/create_model_tokenizer.py


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/generation_config.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/arithmetic_sequence/model/generation_config.json


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/model.safetensors:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/arithmetic_sequence/model/model.safetensors


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/model/tokenizer_config.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/arithmetic_sequence/model/tokenizer_config.json


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/rl/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/arithmetic_sequence/rl/README.md


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/rl/config/ray_trainer.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/arithmetic_sequence/rl/config/ray_trainer.yaml


--------------------------------------------------------------------------------
/tests/e2e/arithmetic_sequence/rl/main_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/arithmetic_sequence/rl/main_trainer.py


--------------------------------------------------------------------------------
/tests/e2e/check_results.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/check_results.py


--------------------------------------------------------------------------------
/tests/e2e/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/envs/__init__.py


--------------------------------------------------------------------------------
/tests/e2e/envs/digit_completion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/envs/digit_completion/__init__.py


--------------------------------------------------------------------------------
/tests/e2e/envs/digit_completion/task.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/envs/digit_completion/task.py


--------------------------------------------------------------------------------
/tests/e2e/envs/digit_completion/tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/envs/digit_completion/tokenizer.py


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_function_rm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/run_qwen_gsm8k_function_rm.sh


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/run_qwen_gsm8k_function_rm_no_rmpad.sh


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_model_rm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/run_qwen_gsm8k_model_rm.sh


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_model_rm_no_rmpad.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/run_qwen_gsm8k_model_rm_no_rmpad.sh


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_model_rm_seq_balance.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/run_qwen_gsm8k_model_rm_seq_balance.sh


--------------------------------------------------------------------------------
/tests/e2e/run_qwen_gsm8k_model_rm_ulysses.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/run_qwen_gsm8k_model_rm_ulysses.sh


--------------------------------------------------------------------------------
/tests/e2e/run_ray_trainer.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/run_ray_trainer.sh


--------------------------------------------------------------------------------
/tests/e2e/run_ray_trainer_rmpad.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/e2e/run_ray_trainer_rmpad.sh


--------------------------------------------------------------------------------
/tests/gpu_utility/test_memory_buffers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/gpu_utility/test_memory_buffers.py


--------------------------------------------------------------------------------
/tests/gpu_utility/test_ops.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/gpu_utility/test_ops.py


--------------------------------------------------------------------------------
/tests/gpu_utility/test_torch_functional.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/gpu_utility/test_torch_functional.py


--------------------------------------------------------------------------------
/tests/model/test_transformer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/model/test_transformer.py


--------------------------------------------------------------------------------
/tests/model/test_transformers_ulysses.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/model/test_transformers_ulysses.py


--------------------------------------------------------------------------------
/tests/ray/check_worker_alive/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/ray/check_worker_alive/main.py


--------------------------------------------------------------------------------
/tests/ray/detached_worker/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/ray/detached_worker/README.md


--------------------------------------------------------------------------------
/tests/ray/detached_worker/client.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/ray/detached_worker/client.py


--------------------------------------------------------------------------------
/tests/ray/detached_worker/run.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/ray/detached_worker/run.sh


--------------------------------------------------------------------------------
/tests/ray/detached_worker/server.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/ray/detached_worker/server.py


--------------------------------------------------------------------------------
/tests/ray/test_check_worker_alive.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/ray/test_check_worker_alive.py


--------------------------------------------------------------------------------
/tests/ray/test_colocated_workers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/ray/test_colocated_workers.py


--------------------------------------------------------------------------------
/tests/ray/test_data_transfer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/ray/test_data_transfer.py


--------------------------------------------------------------------------------
/tests/ray/test_driverfunc_to_worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/ray/test_driverfunc_to_worker.py


--------------------------------------------------------------------------------
/tests/ray/test_high_level_scheduling_api.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/ray/test_high_level_scheduling_api.py


--------------------------------------------------------------------------------
/tests/ray/test_ray_local_envs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/ray/test_ray_local_envs.py


--------------------------------------------------------------------------------
/tests/ray/test_rvdz.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/ray/test_rvdz.py


--------------------------------------------------------------------------------
/tests/ray/test_worker_group_basics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/ray/test_worker_group_basics.py


--------------------------------------------------------------------------------
/tests/ray/test_worker_group_torch.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/ray/test_worker_group_torch.py


--------------------------------------------------------------------------------
/tests/rollout/run_fsdp_vllm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/rollout/run_fsdp_vllm.py


--------------------------------------------------------------------------------
/tests/rollout/test_vllm_hf_loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/rollout/test_vllm_hf_loader.py


--------------------------------------------------------------------------------
/tests/sanity/check_license.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/sanity/check_license.py


--------------------------------------------------------------------------------
/tests/sanity/test_import.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/sanity/test_import.py


--------------------------------------------------------------------------------
/tests/utility/test_tensor_dict_utilities.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/utility/test_tensor_dict_utilities.py


--------------------------------------------------------------------------------
/tests/verl/utils/dataset/test_rl_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/verl/utils/dataset/test_rl_dataset.py


--------------------------------------------------------------------------------
/tests/verl/utils/dataset/test_rm_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/verl/utils/dataset/test_rm_dataset.py


--------------------------------------------------------------------------------
/tests/verl/utils/dataset/test_sft_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/tests/verl/utils/dataset/test_sft_dataset.py


--------------------------------------------------------------------------------
/verl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/__init__.py


--------------------------------------------------------------------------------
/verl/models/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/README.md


--------------------------------------------------------------------------------
/verl/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/__init__.py


--------------------------------------------------------------------------------
/verl/models/llama/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/llama/__init__.py


--------------------------------------------------------------------------------
/verl/models/llama/megatron/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/llama/megatron/__init__.py


--------------------------------------------------------------------------------
/verl/models/llama/megatron/checkpoint_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/llama/megatron/checkpoint_utils/__init__.py


--------------------------------------------------------------------------------
/verl/models/llama/megatron/checkpoint_utils/llama_loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/llama/megatron/checkpoint_utils/llama_loader.py


--------------------------------------------------------------------------------
/verl/models/llama/megatron/checkpoint_utils/llama_saver.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/llama/megatron/checkpoint_utils/llama_saver.py


--------------------------------------------------------------------------------
/verl/models/llama/megatron/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/llama/megatron/layers/__init__.py


--------------------------------------------------------------------------------
/verl/models/llama/megatron/layers/parallel_attention.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/llama/megatron/layers/parallel_attention.py


--------------------------------------------------------------------------------
/verl/models/llama/megatron/layers/parallel_decoder.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/llama/megatron/layers/parallel_decoder.py


--------------------------------------------------------------------------------
/verl/models/llama/megatron/layers/parallel_linear.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/llama/megatron/layers/parallel_linear.py


--------------------------------------------------------------------------------
/verl/models/llama/megatron/layers/parallel_mlp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/llama/megatron/layers/parallel_mlp.py


--------------------------------------------------------------------------------
/verl/models/llama/megatron/layers/parallel_rmsnorm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/llama/megatron/layers/parallel_rmsnorm.py


--------------------------------------------------------------------------------
/verl/models/llama/megatron/modeling_llama_megatron.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/llama/megatron/modeling_llama_megatron.py


--------------------------------------------------------------------------------
/verl/models/registry.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/registry.py


--------------------------------------------------------------------------------
/verl/models/transformers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/transformers/__init__.py


--------------------------------------------------------------------------------
/verl/models/transformers/llama.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/transformers/llama.py


--------------------------------------------------------------------------------
/verl/models/transformers/monkey_patch.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/transformers/monkey_patch.py


--------------------------------------------------------------------------------
/verl/models/transformers/qwen2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/transformers/qwen2.py


--------------------------------------------------------------------------------
/verl/models/weight_loader_registry.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/models/weight_loader_registry.py


--------------------------------------------------------------------------------
/verl/protocol.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/protocol.py


--------------------------------------------------------------------------------
/verl/single_controller/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/single_controller/__init__.py


--------------------------------------------------------------------------------
/verl/single_controller/base/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/single_controller/base/__init__.py


--------------------------------------------------------------------------------
/verl/single_controller/base/decorator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/single_controller/base/decorator.py


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/single_controller/base/megatron/__init__.py


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/single_controller/base/megatron/worker.py


--------------------------------------------------------------------------------
/verl/single_controller/base/megatron/worker_group.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/single_controller/base/megatron/worker_group.py


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/single_controller/base/register_center/__init__.py


--------------------------------------------------------------------------------
/verl/single_controller/base/register_center/ray.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/single_controller/base/register_center/ray.py


--------------------------------------------------------------------------------
/verl/single_controller/base/worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/single_controller/base/worker.py


--------------------------------------------------------------------------------
/verl/single_controller/base/worker_group.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/single_controller/base/worker_group.py


--------------------------------------------------------------------------------
/verl/single_controller/ray/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/single_controller/ray/__init__.py


--------------------------------------------------------------------------------
/verl/single_controller/ray/base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/single_controller/ray/base.py


--------------------------------------------------------------------------------
/verl/single_controller/ray/megatron.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/single_controller/ray/megatron.py


--------------------------------------------------------------------------------
/verl/single_controller/version/version:
--------------------------------------------------------------------------------
1 | 0.0.2


--------------------------------------------------------------------------------
/verl/third_party/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/__init__.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/__init__.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_3_1/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_3_1/__init__.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_3_1/arg_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_3_1/arg_utils.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_3_1/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_3_1/config.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_3_1/llm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_3_1/llm.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_3_1/llm_engine_sp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_3_1/llm_engine_sp.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_3_1/model_loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_3_1/model_loader.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_3_1/model_runner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_3_1/model_runner.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_3_1/parallel_state.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_3_1/parallel_state.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_3_1/tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_3_1/tokenizer.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_3_1/weight_loaders.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_3_1/weight_loaders.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_3_1/worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_3_1/worker.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_4_2/__init__.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/arg_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_4_2/arg_utils.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_4_2/config.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/dtensor_weight_loaders.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_4_2/dtensor_weight_loaders.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/hf_weight_loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_4_2/hf_weight_loader.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/llm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_4_2/llm.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/llm_engine_sp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_4_2/llm_engine_sp.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/megatron_weight_loaders.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_4_2/megatron_weight_loaders.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/model_loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_4_2/model_loader.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/model_runner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_4_2/model_runner.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/parallel_state.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_4_2/parallel_state.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/spmd_gpu_executor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_4_2/spmd_gpu_executor.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_4_2/tokenizer.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_4_2/worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_4_2/worker.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_5_4/__init__.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/arg_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_5_4/arg_utils.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_5_4/config.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/dtensor_weight_loaders.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_5_4/dtensor_weight_loaders.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/llm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_5_4/llm.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/llm_engine_sp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_5_4/llm_engine_sp.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/megatron_weight_loaders.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_5_4/megatron_weight_loaders.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/model_loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_5_4/model_loader.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/model_runner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_5_4/model_runner.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/spmd_gpu_executor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_5_4/spmd_gpu_executor.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_5_4/tokenizer.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_5_4/worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_5_4/worker.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_6_3/__init__.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/arg_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_6_3/arg_utils.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_6_3/config.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/dtensor_weight_loaders.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_6_3/dtensor_weight_loaders.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/llm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_6_3/llm.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/llm_engine_sp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_6_3/llm_engine_sp.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/megatron_weight_loaders.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_6_3/megatron_weight_loaders.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/model_loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_6_3/model_loader.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/model_runner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_6_3/model_runner.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/parallel_state.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_6_3/parallel_state.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/spmd_gpu_executor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_6_3/spmd_gpu_executor.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py


--------------------------------------------------------------------------------
/verl/third_party/vllm/vllm_v_0_6_3/worker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/third_party/vllm/vllm_v_0_6_3/worker.py


--------------------------------------------------------------------------------
/verl/trainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/trainer/__init__.py


--------------------------------------------------------------------------------
/verl/trainer/config/eval_with_generation.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/trainer/config/eval_with_generation.yaml


--------------------------------------------------------------------------------
/verl/trainer/config/evaluation.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/trainer/config/evaluation.yaml


--------------------------------------------------------------------------------
/verl/trainer/config/generation.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/trainer/config/generation.yaml


--------------------------------------------------------------------------------
/verl/trainer/config/ppo_megatron_trainer.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/trainer/config/ppo_megatron_trainer.yaml


--------------------------------------------------------------------------------
/verl/trainer/config/ppo_trainer.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/trainer/config/ppo_trainer.yaml


--------------------------------------------------------------------------------
/verl/trainer/config/sft_trainer.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/trainer/config/sft_trainer.yaml


--------------------------------------------------------------------------------
/verl/trainer/fsdp_sft_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/trainer/fsdp_sft_trainer.py


--------------------------------------------------------------------------------
/verl/trainer/main_eval.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/trainer/main_eval.py


--------------------------------------------------------------------------------
/verl/trainer/main_eval_with_generation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/trainer/main_eval_with_generation.py


--------------------------------------------------------------------------------
/verl/trainer/main_generation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/trainer/main_generation.py


--------------------------------------------------------------------------------
/verl/trainer/main_ppo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/trainer/main_ppo.py


--------------------------------------------------------------------------------
/verl/trainer/ppo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/trainer/ppo/__init__.py


--------------------------------------------------------------------------------
/verl/trainer/ppo/core_algos.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/trainer/ppo/core_algos.py


--------------------------------------------------------------------------------
/verl/trainer/ppo/ray_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/trainer/ppo/ray_trainer.py


--------------------------------------------------------------------------------
/verl/trainer/runtime_env.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/trainer/runtime_env.yaml


--------------------------------------------------------------------------------
/verl/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/__init__.py


--------------------------------------------------------------------------------
/verl/utils/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/config.py


--------------------------------------------------------------------------------
/verl/utils/dataset/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/dataset/README.md


--------------------------------------------------------------------------------
/verl/utils/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/dataset/__init__.py


--------------------------------------------------------------------------------
/verl/utils/dataset/rl_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/dataset/rl_dataset.py


--------------------------------------------------------------------------------
/verl/utils/dataset/rm_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/dataset/rm_dataset.py


--------------------------------------------------------------------------------
/verl/utils/dataset/sft_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/dataset/sft_dataset.py


--------------------------------------------------------------------------------
/verl/utils/debug/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/debug/__init__.py


--------------------------------------------------------------------------------
/verl/utils/debug/performance.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/debug/performance.py


--------------------------------------------------------------------------------
/verl/utils/debug/trajectory_tracker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/debug/trajectory_tracker.py


--------------------------------------------------------------------------------
/verl/utils/distributed.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/distributed.py


--------------------------------------------------------------------------------
/verl/utils/flops_counter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/flops_counter.py


--------------------------------------------------------------------------------
/verl/utils/fs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/fs.py


--------------------------------------------------------------------------------
/verl/utils/fsdp_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/fsdp_utils.py


--------------------------------------------------------------------------------
/verl/utils/hdfs_io.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/hdfs_io.py


--------------------------------------------------------------------------------
/verl/utils/import_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/import_utils.py


--------------------------------------------------------------------------------
/verl/utils/logger/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/logger/__init__.py


--------------------------------------------------------------------------------
/verl/utils/logger/aggregate_logger.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/logger/aggregate_logger.py


--------------------------------------------------------------------------------
/verl/utils/logging_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/logging_utils.py


--------------------------------------------------------------------------------
/verl/utils/megatron/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/megatron/__init__.py


--------------------------------------------------------------------------------
/verl/utils/megatron/memory.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/megatron/memory.py


--------------------------------------------------------------------------------
/verl/utils/megatron/optimizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/megatron/optimizer.py


--------------------------------------------------------------------------------
/verl/utils/megatron/optimizer_config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/megatron/optimizer_config.py


--------------------------------------------------------------------------------
/verl/utils/megatron/pipeline_parallel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/megatron/pipeline_parallel.py


--------------------------------------------------------------------------------
/verl/utils/megatron/sequence_parallel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/megatron/sequence_parallel.py


--------------------------------------------------------------------------------
/verl/utils/megatron/tensor_parallel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/megatron/tensor_parallel.py


--------------------------------------------------------------------------------
/verl/utils/megatron_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/megatron_utils.py


--------------------------------------------------------------------------------
/verl/utils/memory_buffer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/memory_buffer.py


--------------------------------------------------------------------------------
/verl/utils/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/model.py


--------------------------------------------------------------------------------
/verl/utils/py_functional.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/py_functional.py


--------------------------------------------------------------------------------
/verl/utils/ray_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/ray_utils.py


--------------------------------------------------------------------------------
/verl/utils/rendezvous/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/rendezvous/__init__.py


--------------------------------------------------------------------------------
/verl/utils/rendezvous/ray_backend.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/rendezvous/ray_backend.py


--------------------------------------------------------------------------------
/verl/utils/reward_score/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/reward_score/__init__.py


--------------------------------------------------------------------------------
/verl/utils/reward_score/countdown.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/reward_score/countdown.py


--------------------------------------------------------------------------------
/verl/utils/reward_score/deepscaler/globals.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/reward_score/deepscaler/globals.py


--------------------------------------------------------------------------------
/verl/utils/reward_score/deepscaler/rewards/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/reward_score/deepscaler/rewards/__init__.py


--------------------------------------------------------------------------------
/verl/utils/reward_score/deepscaler/rewards/math_reward.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/reward_score/deepscaler/rewards/math_reward.py


--------------------------------------------------------------------------------
/verl/utils/reward_score/deepscaler/rewards/math_reward_pl.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/reward_score/deepscaler/rewards/math_reward_pl.py


--------------------------------------------------------------------------------
/verl/utils/reward_score/deepscaler/rewards/math_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/reward_score/deepscaler/rewards/math_utils/__init__.py


--------------------------------------------------------------------------------
/verl/utils/reward_score/deepscaler/rewards/math_utils/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/reward_score/deepscaler/rewards/math_utils/utils.py


--------------------------------------------------------------------------------
/verl/utils/reward_score/deepscaler/rewards/reward_types.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/reward_score/deepscaler/rewards/reward_types.py


--------------------------------------------------------------------------------
/verl/utils/reward_score/deepscaler/system_prompts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/reward_score/deepscaler/system_prompts.py


--------------------------------------------------------------------------------
/verl/utils/reward_score/deepscaler/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/reward_score/deepscaler/utils.py


--------------------------------------------------------------------------------
/verl/utils/reward_score/gsm8k.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/reward_score/gsm8k.py


--------------------------------------------------------------------------------
/verl/utils/reward_score/kk.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/reward_score/kk.py


--------------------------------------------------------------------------------
/verl/utils/reward_score/math.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/reward_score/math.py


--------------------------------------------------------------------------------
/verl/utils/reward_score/multiply.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/reward_score/multiply.py


--------------------------------------------------------------------------------
/verl/utils/seqlen_balancing.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/seqlen_balancing.py


--------------------------------------------------------------------------------
/verl/utils/tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/tokenizer.py


--------------------------------------------------------------------------------
/verl/utils/torch_dtypes.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/torch_dtypes.py


--------------------------------------------------------------------------------
/verl/utils/torch_functional.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/torch_functional.py


--------------------------------------------------------------------------------
/verl/utils/tracking.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/tracking.py


--------------------------------------------------------------------------------
/verl/utils/ulysses.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/utils/ulysses.py


--------------------------------------------------------------------------------
/verl/version/version:
--------------------------------------------------------------------------------
1 | 0.1


--------------------------------------------------------------------------------
/verl/workers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/__init__.py


--------------------------------------------------------------------------------
/verl/workers/actor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/actor/__init__.py


--------------------------------------------------------------------------------
/verl/workers/actor/base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/actor/base.py


--------------------------------------------------------------------------------
/verl/workers/actor/dp_actor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/actor/dp_actor.py


--------------------------------------------------------------------------------
/verl/workers/actor/megatron_actor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/actor/megatron_actor.py


--------------------------------------------------------------------------------
/verl/workers/critic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/critic/__init__.py


--------------------------------------------------------------------------------
/verl/workers/critic/base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/critic/base.py


--------------------------------------------------------------------------------
/verl/workers/critic/dp_critic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/critic/dp_critic.py


--------------------------------------------------------------------------------
/verl/workers/critic/megatron_critic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/critic/megatron_critic.py


--------------------------------------------------------------------------------
/verl/workers/fsdp_workers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/fsdp_workers.py


--------------------------------------------------------------------------------
/verl/workers/megatron_workers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/megatron_workers.py


--------------------------------------------------------------------------------
/verl/workers/reward_model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/reward_model/__init__.py


--------------------------------------------------------------------------------
/verl/workers/reward_model/base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/reward_model/base.py


--------------------------------------------------------------------------------
/verl/workers/reward_model/megatron/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/reward_model/megatron/__init__.py


--------------------------------------------------------------------------------
/verl/workers/reward_model/megatron/reward_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/reward_model/megatron/reward_model.py


--------------------------------------------------------------------------------
/verl/workers/rollout/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/rollout/__init__.py


--------------------------------------------------------------------------------
/verl/workers/rollout/base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/rollout/base.py


--------------------------------------------------------------------------------
/verl/workers/rollout/hf_rollout.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/rollout/hf_rollout.py


--------------------------------------------------------------------------------
/verl/workers/rollout/naive/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/rollout/naive/__init__.py


--------------------------------------------------------------------------------
/verl/workers/rollout/naive/naive_rollout.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/rollout/naive/naive_rollout.py


--------------------------------------------------------------------------------
/verl/workers/rollout/tokenizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/rollout/tokenizer.py


--------------------------------------------------------------------------------
/verl/workers/rollout/vllm_rollout/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/rollout/vllm_rollout/__init__.py


--------------------------------------------------------------------------------
/verl/workers/rollout/vllm_rollout/vllm_rollout.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/rollout/vllm_rollout/vllm_rollout.py


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/sharding_manager/__init__.py


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/sharding_manager/base.py


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/fsdp_ulysses.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/sharding_manager/fsdp_ulysses.py


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/fsdp_vllm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/sharding_manager/fsdp_vllm.py


--------------------------------------------------------------------------------
/verl/workers/sharding_manager/megatron_vllm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XueruiSu/Trust-Region-Preference-Approximation/HEAD/verl/workers/sharding_manager/megatron_vllm.py


--------------------------------------------------------------------------------