├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── docs ├── logo.png ├── stable.png └── teaser.png ├── inference ├── MM-RLHF-Reward │ ├── get_acc.py │ ├── mm_reward_bench.jsonl │ └── r1_reward.py ├── Multimodal-Reward │ ├── 0_download_data.py │ ├── 1_get_accuracy.py │ ├── get_IXC_reward.py │ ├── get_mm-rlhf_reward.py │ └── get_r1_reward.py └── VL-Reward-Bench │ ├── data.jsonl │ ├── get_acc.py │ └── r1_reward.py ├── openrlhf ├── __init__.py ├── __pycache__ │ └── __init__.cpython-310.pyc ├── cli │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── train_ppo.cpython-310.pyc │ │ └── train_ppo_ray.cpython-310.pyc │ ├── batch_inference.py │ ├── interactive_chat.py │ ├── lora_combiner.py │ ├── serve_rm.py │ ├── train_dpo.py │ ├── train_kd.py │ ├── train_kto.py │ ├── train_ppo.py │ ├── train_ppo_ray.py │ ├── train_prm.py │ ├── train_rm.py │ └── train_sft.py ├── datasets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── process_reward_dataset.cpython-310.pyc │ │ ├── prompts_dataset.cpython-310.pyc │ │ ├── reward_dataset.cpython-310.pyc │ │ ├── sft_dataset.cpython-310.pyc │ │ ├── unpaired_preference_dataset.cpython-310.pyc │ │ └── utils.cpython-310.pyc │ ├── process_reward_dataset.py │ ├── prompts_dataset.py │ ├── reward_dataset.py │ ├── sft_dataset.py │ ├── unpaired_preference_dataset.py │ └── utils.py ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── actor.cpython-310.pyc │ │ ├── loss.cpython-310.pyc │ │ ├── model.cpython-310.pyc │ │ ├── ring_attn_utils.cpython-310.pyc │ │ └── utils.cpython-310.pyc │ ├── actor.py │ ├── loss.py │ ├── model.py │ ├── remote_rm │ │ ├── __pycache__ │ │ │ ├── math_equal.cpython-310.pyc │ │ │ ├── math_normalize.cpython-310.pyc │ │ │ ├── math_verifier.cpython-310.pyc │ │ │ ├── math_verifier_mllm.cpython-310.pyc │ │ │ ├── math_verifier_mllm_all1.cpython-310.pyc │ │ │ ├── math_verifier_mllm_all1_2.cpython-310.pyc │ │ │ ├── math_verifier_mllm_longcot.cpython-310.pyc │ │ │ ├── math_verifier_mllm_reward.cpython-310.pyc │ │ │ └── strict_math_verify.cpython-310.pyc │ │ ├── math_equal.py │ │ ├── math_normalize.py │ │ ├── math_verifier.py │ │ ├── math_verifier_mllm.py │ │ └── strict_math_verify.py │ ├── ring_attn_utils.py │ └── utils.py ├── trainer │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── dpo_trainer.cpython-310.pyc │ │ ├── kd_trainer.cpython-310.pyc │ │ ├── kto_trainer.cpython-310.pyc │ │ ├── ppo_trainer.cpython-310.pyc │ │ ├── prm_trainer.cpython-310.pyc │ │ ├── rm_trainer.cpython-310.pyc │ │ └── sft_trainer.cpython-310.pyc │ ├── dpo_trainer.py │ ├── kd_trainer.py │ ├── kto_trainer.py │ ├── ppo_trainer.py │ ├── ppo_utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── data_processor.cpython-310.pyc │ │ │ ├── experience_maker.cpython-310.pyc │ │ │ ├── kl_controller.cpython-310.pyc │ │ │ └── replay_buffer.cpython-310.pyc │ │ ├── data_processor.py │ │ ├── experience_maker.py │ │ ├── kl_controller.py │ │ └── replay_buffer.py │ ├── prm_trainer.py │ ├── ray │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── launcher.cpython-310.pyc │ │ │ ├── ppo_actor.cpython-310.pyc │ │ │ ├── ppo_critic.cpython-310.pyc │ │ │ ├── utils.cpython-310.pyc │ │ │ ├── vllm_engine.cpython-310.pyc │ │ │ └── vllm_worker_wrap.cpython-310.pyc │ │ ├── launcher.py │ │ ├── ppo_actor.py │ │ ├── ppo_critic.py │ │ ├── utils.py │ │ ├── vllm_engine.py │ │ └── vllm_worker_wrap.py │ ├── rm_trainer.py │ └── sft_trainer.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── distributed_sampler.cpython-310.pyc │ ├── distributed_util.cpython-310.pyc │ ├── logging_utils.cpython-310.pyc │ ├── processor.cpython-310.pyc │ ├── remote_rm_utils.cpython-310.pyc │ └── utils.cpython-310.pyc │ ├── deepspeed │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── deepspeed.cpython-310.pyc │ │ └── deepspeed_utils.cpython-310.pyc │ ├── deepspeed.py │ └── deepspeed_utils.py │ ├── distributed_sampler.py │ ├── distributed_util.py │ ├── logging_utils.py │ ├── processor.py │ ├── remote_rm_utils.py │ └── utils.py ├── pyproject.toml ├── requirements.txt ├── scripts └── train.sh ├── setup.py └── version.txt /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/README.md -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/docs/logo.png -------------------------------------------------------------------------------- /docs/stable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/docs/stable.png -------------------------------------------------------------------------------- /docs/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/docs/teaser.png -------------------------------------------------------------------------------- /inference/MM-RLHF-Reward/get_acc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/inference/MM-RLHF-Reward/get_acc.py -------------------------------------------------------------------------------- /inference/MM-RLHF-Reward/mm_reward_bench.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/inference/MM-RLHF-Reward/mm_reward_bench.jsonl -------------------------------------------------------------------------------- /inference/MM-RLHF-Reward/r1_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/inference/MM-RLHF-Reward/r1_reward.py -------------------------------------------------------------------------------- /inference/Multimodal-Reward/0_download_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/inference/Multimodal-Reward/0_download_data.py -------------------------------------------------------------------------------- /inference/Multimodal-Reward/1_get_accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/inference/Multimodal-Reward/1_get_accuracy.py -------------------------------------------------------------------------------- /inference/Multimodal-Reward/get_IXC_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/inference/Multimodal-Reward/get_IXC_reward.py -------------------------------------------------------------------------------- /inference/Multimodal-Reward/get_mm-rlhf_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/inference/Multimodal-Reward/get_mm-rlhf_reward.py -------------------------------------------------------------------------------- /inference/Multimodal-Reward/get_r1_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/inference/Multimodal-Reward/get_r1_reward.py -------------------------------------------------------------------------------- /inference/VL-Reward-Bench/data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/inference/VL-Reward-Bench/data.jsonl -------------------------------------------------------------------------------- /inference/VL-Reward-Bench/get_acc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/inference/VL-Reward-Bench/get_acc.py -------------------------------------------------------------------------------- /inference/VL-Reward-Bench/r1_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/inference/VL-Reward-Bench/r1_reward.py -------------------------------------------------------------------------------- /openrlhf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /openrlhf/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /openrlhf/cli/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/cli/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/cli/__pycache__/train_ppo.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/cli/__pycache__/train_ppo.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/cli/__pycache__/train_ppo_ray.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/cli/__pycache__/train_ppo_ray.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/cli/batch_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/cli/batch_inference.py -------------------------------------------------------------------------------- /openrlhf/cli/interactive_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/cli/interactive_chat.py -------------------------------------------------------------------------------- /openrlhf/cli/lora_combiner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/cli/lora_combiner.py -------------------------------------------------------------------------------- /openrlhf/cli/serve_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/cli/serve_rm.py -------------------------------------------------------------------------------- /openrlhf/cli/train_dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/cli/train_dpo.py -------------------------------------------------------------------------------- /openrlhf/cli/train_kd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/cli/train_kd.py -------------------------------------------------------------------------------- /openrlhf/cli/train_kto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/cli/train_kto.py -------------------------------------------------------------------------------- /openrlhf/cli/train_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/cli/train_ppo.py -------------------------------------------------------------------------------- /openrlhf/cli/train_ppo_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/cli/train_ppo_ray.py -------------------------------------------------------------------------------- /openrlhf/cli/train_prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/cli/train_prm.py -------------------------------------------------------------------------------- /openrlhf/cli/train_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/cli/train_rm.py -------------------------------------------------------------------------------- /openrlhf/cli/train_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/cli/train_sft.py -------------------------------------------------------------------------------- /openrlhf/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/datasets/__init__.py -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/datasets/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/process_reward_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/datasets/__pycache__/process_reward_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/prompts_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/datasets/__pycache__/prompts_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/reward_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/datasets/__pycache__/reward_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/sft_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/datasets/__pycache__/sft_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/datasets/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/process_reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/datasets/process_reward_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/prompts_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/datasets/prompts_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/datasets/reward_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/datasets/sft_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/unpaired_preference_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/datasets/unpaired_preference_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/datasets/utils.py -------------------------------------------------------------------------------- /openrlhf/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/__init__.py -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/actor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/__pycache__/actor.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/loss.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/__pycache__/loss.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/ring_attn_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/__pycache__/ring_attn_utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/actor.py -------------------------------------------------------------------------------- /openrlhf/models/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/loss.py -------------------------------------------------------------------------------- /openrlhf/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/model.py -------------------------------------------------------------------------------- /openrlhf/models/remote_rm/__pycache__/math_equal.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/remote_rm/__pycache__/math_equal.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/remote_rm/__pycache__/math_normalize.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/remote_rm/__pycache__/math_normalize.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/remote_rm/__pycache__/math_verifier.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/remote_rm/__pycache__/math_verifier.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/remote_rm/__pycache__/math_verifier_mllm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/remote_rm/__pycache__/math_verifier_mllm.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/remote_rm/__pycache__/math_verifier_mllm_all1.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/remote_rm/__pycache__/math_verifier_mllm_all1.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/remote_rm/__pycache__/math_verifier_mllm_all1_2.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/remote_rm/__pycache__/math_verifier_mllm_all1_2.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/remote_rm/__pycache__/math_verifier_mllm_longcot.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/remote_rm/__pycache__/math_verifier_mllm_longcot.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/remote_rm/__pycache__/math_verifier_mllm_reward.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/remote_rm/__pycache__/math_verifier_mllm_reward.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/remote_rm/__pycache__/strict_math_verify.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/remote_rm/__pycache__/strict_math_verify.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/remote_rm/math_equal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/remote_rm/math_equal.py -------------------------------------------------------------------------------- /openrlhf/models/remote_rm/math_normalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/remote_rm/math_normalize.py -------------------------------------------------------------------------------- /openrlhf/models/remote_rm/math_verifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/remote_rm/math_verifier.py -------------------------------------------------------------------------------- /openrlhf/models/remote_rm/math_verifier_mllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/remote_rm/math_verifier_mllm.py -------------------------------------------------------------------------------- /openrlhf/models/remote_rm/strict_math_verify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/remote_rm/strict_math_verify.py -------------------------------------------------------------------------------- /openrlhf/models/ring_attn_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/ring_attn_utils.py -------------------------------------------------------------------------------- /openrlhf/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/models/utils.py -------------------------------------------------------------------------------- /openrlhf/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/dpo_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/__pycache__/dpo_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/kd_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/__pycache__/kd_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/kto_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/__pycache__/kto_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/ppo_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/__pycache__/ppo_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/prm_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/__pycache__/prm_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/rm_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/__pycache__/rm_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/sft_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/__pycache__/sft_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/dpo_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/kd_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/kd_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/kto_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/kto_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ppo_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ppo_utils/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ppo_utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__pycache__/data_processor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ppo_utils/__pycache__/data_processor.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__pycache__/experience_maker.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ppo_utils/__pycache__/experience_maker.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__pycache__/kl_controller.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ppo_utils/__pycache__/kl_controller.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__pycache__/replay_buffer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ppo_utils/__pycache__/replay_buffer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/data_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ppo_utils/data_processor.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/experience_maker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ppo_utils/experience_maker.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/kl_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ppo_utils/kl_controller.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/replay_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ppo_utils/replay_buffer.py -------------------------------------------------------------------------------- /openrlhf/trainer/prm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/prm_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ray/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ray/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/launcher.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ray/__pycache__/launcher.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/ppo_actor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ray/__pycache__/ppo_actor.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/ppo_critic.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ray/__pycache__/ppo_critic.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ray/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/vllm_engine.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ray/__pycache__/vllm_engine.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/vllm_worker_wrap.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ray/__pycache__/vllm_worker_wrap.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ray/launcher.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/ppo_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ray/ppo_actor.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/ppo_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ray/ppo_critic.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ray/utils.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/vllm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ray/vllm_engine.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/vllm_worker_wrap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/ray/vllm_worker_wrap.py -------------------------------------------------------------------------------- /openrlhf/trainer/rm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/rm_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/trainer/sft_trainer.py -------------------------------------------------------------------------------- /openrlhf/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/__init__.py -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/distributed_sampler.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/__pycache__/distributed_sampler.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/distributed_util.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/__pycache__/distributed_util.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/logging_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/__pycache__/logging_utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/processor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/__pycache__/processor.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/remote_rm_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/__pycache__/remote_rm_utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/deepspeed/__init__.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/deepspeed/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/__pycache__/deepspeed.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/deepspeed/__pycache__/deepspeed.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/__pycache__/deepspeed_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/deepspeed/__pycache__/deepspeed_utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/deepspeed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/deepspeed/deepspeed.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/deepspeed_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/deepspeed/deepspeed_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/distributed_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/distributed_sampler.py -------------------------------------------------------------------------------- /openrlhf/utils/distributed_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/distributed_util.py -------------------------------------------------------------------------------- /openrlhf/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/logging_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/processor.py -------------------------------------------------------------------------------- /openrlhf/utils/remote_rm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/remote_rm_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/openrlhf/utils/utils.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/scripts/train.sh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yfzhang114/r1_reward/HEAD/setup.py -------------------------------------------------------------------------------- /version.txt: -------------------------------------------------------------------------------- 1 | 0.6.0 --------------------------------------------------------------------------------