├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── assets ├── noisyrollout_workflow.png ├── noisyrollout_workflow_caption.png └── teaser_comparison.png ├── eval ├── example_eval.sh ├── main.py └── utils │ ├── __init__.py │ ├── data_loaders.py │ ├── model_parser.py │ └── processing.py ├── pyproject.toml ├── requirements.txt ├── scripts └── model_merger.py ├── setup.py ├── training_scripts ├── README.md ├── config.yaml ├── qwen2_5_vl_7b_geo3k_grpo.sh ├── qwen2_5_vl_7b_geo3k_noisyrollout.sh ├── qwen2_5_vl_7b_k12_grpo.sh └── qwen2_5_vl_7b_k12_noisyrollout.sh └── verl ├── __init__.py ├── models ├── __init__.py ├── monkey_patch.py └── transformers │ ├── __init__.py │ ├── flash_attention_utils.py │ └── qwen2_vl.py ├── protocol.py ├── single_controller ├── __init__.py ├── base │ ├── __init__.py │ ├── decorator.py │ ├── register_center │ │ ├── __init__.py │ │ └── ray.py │ ├── worker.py │ └── worker_group.py └── ray │ ├── __init__.py │ └── base.py ├── trainer ├── __init__.py ├── config.py ├── core_algos.py ├── main.py ├── metrics.py └── ray_trainer.py ├── utils ├── __init__.py ├── checkpoint │ ├── __init__.py │ ├── checkpoint_manager.py │ └── fsdp_checkpoint_manager.py ├── dataset.py ├── flops_counter.py ├── fsdp_utils.py ├── image_aug.py ├── logger │ ├── __init__.py │ └── aggregate_logger.py ├── model_utils.py ├── py_functional.py ├── reward_score │ ├── __init__.py │ ├── math.py │ └── r1v.py ├── seqlen_balancing.py ├── tokenizer.py ├── torch_dtypes.py ├── torch_functional.py ├── tracking.py └── ulysses.py └── workers ├── __init__.py ├── actor ├── __init__.py ├── base.py ├── config.py └── dp_actor.py ├── config.py ├── critic ├── __init__.py ├── base.py ├── config.py └── dp_critic.py ├── fsdp_workers.py ├── reward ├── __init__.py ├── config.py └── custom.py ├── rollout ├── __init__.py ├── base.py ├── config.py └── vllm_rollout │ ├── __init__.py │ └── vllm_rollout_spmd.py └── sharding_manager ├── __init__.py ├── base.py ├── fsdp_ulysses.py └── fsdp_vllm.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/README.md -------------------------------------------------------------------------------- /assets/noisyrollout_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/assets/noisyrollout_workflow.png -------------------------------------------------------------------------------- /assets/noisyrollout_workflow_caption.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/assets/noisyrollout_workflow_caption.png -------------------------------------------------------------------------------- /assets/teaser_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/assets/teaser_comparison.png -------------------------------------------------------------------------------- /eval/example_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/eval/example_eval.sh -------------------------------------------------------------------------------- /eval/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/eval/main.py -------------------------------------------------------------------------------- /eval/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval/utils/data_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/eval/utils/data_loaders.py -------------------------------------------------------------------------------- /eval/utils/model_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/eval/utils/model_parser.py -------------------------------------------------------------------------------- /eval/utils/processing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/eval/utils/processing.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/model_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/scripts/model_merger.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/setup.py -------------------------------------------------------------------------------- /training_scripts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/training_scripts/README.md -------------------------------------------------------------------------------- /training_scripts/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/training_scripts/config.yaml -------------------------------------------------------------------------------- /training_scripts/qwen2_5_vl_7b_geo3k_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/training_scripts/qwen2_5_vl_7b_geo3k_grpo.sh -------------------------------------------------------------------------------- /training_scripts/qwen2_5_vl_7b_geo3k_noisyrollout.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/training_scripts/qwen2_5_vl_7b_geo3k_noisyrollout.sh -------------------------------------------------------------------------------- /training_scripts/qwen2_5_vl_7b_k12_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/training_scripts/qwen2_5_vl_7b_k12_grpo.sh -------------------------------------------------------------------------------- /training_scripts/qwen2_5_vl_7b_k12_noisyrollout.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/training_scripts/qwen2_5_vl_7b_k12_noisyrollout.sh -------------------------------------------------------------------------------- /verl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/__init__.py -------------------------------------------------------------------------------- /verl/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/models/__init__.py -------------------------------------------------------------------------------- /verl/models/monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/models/monkey_patch.py -------------------------------------------------------------------------------- /verl/models/transformers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/models/transformers/__init__.py -------------------------------------------------------------------------------- /verl/models/transformers/flash_attention_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/models/transformers/flash_attention_utils.py -------------------------------------------------------------------------------- /verl/models/transformers/qwen2_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/models/transformers/qwen2_vl.py -------------------------------------------------------------------------------- /verl/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/protocol.py -------------------------------------------------------------------------------- /verl/single_controller/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/single_controller/__init__.py -------------------------------------------------------------------------------- /verl/single_controller/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/single_controller/base/__init__.py -------------------------------------------------------------------------------- /verl/single_controller/base/decorator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/single_controller/base/decorator.py -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/single_controller/base/register_center/__init__.py -------------------------------------------------------------------------------- /verl/single_controller/base/register_center/ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/single_controller/base/register_center/ray.py -------------------------------------------------------------------------------- /verl/single_controller/base/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/single_controller/base/worker.py -------------------------------------------------------------------------------- /verl/single_controller/base/worker_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/single_controller/base/worker_group.py -------------------------------------------------------------------------------- /verl/single_controller/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/single_controller/ray/__init__.py -------------------------------------------------------------------------------- /verl/single_controller/ray/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/single_controller/ray/base.py -------------------------------------------------------------------------------- /verl/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/trainer/__init__.py -------------------------------------------------------------------------------- /verl/trainer/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/trainer/config.py -------------------------------------------------------------------------------- /verl/trainer/core_algos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/trainer/core_algos.py -------------------------------------------------------------------------------- /verl/trainer/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/trainer/main.py -------------------------------------------------------------------------------- /verl/trainer/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/trainer/metrics.py -------------------------------------------------------------------------------- /verl/trainer/ray_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/trainer/ray_trainer.py -------------------------------------------------------------------------------- /verl/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/__init__.py -------------------------------------------------------------------------------- /verl/utils/checkpoint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/checkpoint/__init__.py -------------------------------------------------------------------------------- /verl/utils/checkpoint/checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/checkpoint/checkpoint_manager.py -------------------------------------------------------------------------------- /verl/utils/checkpoint/fsdp_checkpoint_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/checkpoint/fsdp_checkpoint_manager.py -------------------------------------------------------------------------------- /verl/utils/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/dataset.py -------------------------------------------------------------------------------- /verl/utils/flops_counter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/flops_counter.py -------------------------------------------------------------------------------- /verl/utils/fsdp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/fsdp_utils.py -------------------------------------------------------------------------------- /verl/utils/image_aug.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/image_aug.py -------------------------------------------------------------------------------- /verl/utils/logger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/logger/__init__.py -------------------------------------------------------------------------------- /verl/utils/logger/aggregate_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/logger/aggregate_logger.py -------------------------------------------------------------------------------- /verl/utils/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/model_utils.py -------------------------------------------------------------------------------- /verl/utils/py_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/py_functional.py -------------------------------------------------------------------------------- /verl/utils/reward_score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/reward_score/__init__.py -------------------------------------------------------------------------------- /verl/utils/reward_score/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/reward_score/math.py -------------------------------------------------------------------------------- /verl/utils/reward_score/r1v.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/reward_score/r1v.py -------------------------------------------------------------------------------- /verl/utils/seqlen_balancing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/seqlen_balancing.py -------------------------------------------------------------------------------- /verl/utils/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/tokenizer.py -------------------------------------------------------------------------------- /verl/utils/torch_dtypes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/torch_dtypes.py -------------------------------------------------------------------------------- /verl/utils/torch_functional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/torch_functional.py -------------------------------------------------------------------------------- /verl/utils/tracking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/tracking.py -------------------------------------------------------------------------------- /verl/utils/ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/utils/ulysses.py -------------------------------------------------------------------------------- /verl/workers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/__init__.py -------------------------------------------------------------------------------- /verl/workers/actor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/actor/__init__.py -------------------------------------------------------------------------------- /verl/workers/actor/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/actor/base.py -------------------------------------------------------------------------------- /verl/workers/actor/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/actor/config.py -------------------------------------------------------------------------------- /verl/workers/actor/dp_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/actor/dp_actor.py -------------------------------------------------------------------------------- /verl/workers/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/config.py -------------------------------------------------------------------------------- /verl/workers/critic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/critic/__init__.py -------------------------------------------------------------------------------- /verl/workers/critic/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/critic/base.py -------------------------------------------------------------------------------- /verl/workers/critic/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/critic/config.py -------------------------------------------------------------------------------- /verl/workers/critic/dp_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/critic/dp_critic.py -------------------------------------------------------------------------------- /verl/workers/fsdp_workers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/fsdp_workers.py -------------------------------------------------------------------------------- /verl/workers/reward/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/reward/__init__.py -------------------------------------------------------------------------------- /verl/workers/reward/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/reward/config.py -------------------------------------------------------------------------------- /verl/workers/reward/custom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/reward/custom.py -------------------------------------------------------------------------------- /verl/workers/rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/rollout/__init__.py -------------------------------------------------------------------------------- /verl/workers/rollout/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/rollout/base.py -------------------------------------------------------------------------------- /verl/workers/rollout/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/rollout/config.py -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/rollout/vllm_rollout/__init__.py -------------------------------------------------------------------------------- /verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/sharding_manager/__init__.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/sharding_manager/base.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/fsdp_ulysses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/sharding_manager/fsdp_ulysses.py -------------------------------------------------------------------------------- /verl/workers/sharding_manager/fsdp_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-TRAIL/NoisyRollout/HEAD/verl/workers/sharding_manager/fsdp_vllm.py --------------------------------------------------------------------------------