├── .gitignore ├── LICENSE ├── README.md ├── assets ├── overview-2.jpg └── overview-2.pdf ├── installation.md ├── openrlhf ├── __init__.py ├── cli │ ├── __init__.py │ ├── eval_ray.py │ └── train_ppo_ray.py ├── datasets │ ├── __init__.py │ ├── prompts_dataset.py │ └── utils.py ├── models │ ├── __init__.py │ ├── actor.py │ ├── loss.py │ ├── model.py │ ├── ring_attn_utils.py │ └── utils.py ├── trainer │ ├── __init__.py │ ├── evaluator.py │ ├── ppo_trainer.py │ ├── ppo_utils │ │ ├── __init__.py │ │ ├── data_processor.py │ │ ├── experience_maker.py │ │ ├── kl_controller.py │ │ └── replay_buffer.py │ └── ray │ │ ├── __init__.py │ │ ├── evaluator2.py │ │ ├── launcher.py │ │ ├── ppo_actor.py │ │ ├── ppo_critic.py │ │ ├── utils.py │ │ ├── vllm_engine.py │ │ └── vllm_worker_wrap.py └── utils │ ├── __init__.py │ ├── deepspeed │ ├── __init__.py │ ├── deepspeed.py │ └── deepspeed_utils.py │ ├── distributed_sampler.py │ ├── distributed_util.py │ ├── logging_utils.py │ ├── processor.py │ └── utils.py ├── pyproject.toml ├── requirements.txt ├── scripts ├── eval_7b.sh ├── eval_vlm_new.sh └── train_vlm_multi.sh └── setup.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/README.md -------------------------------------------------------------------------------- /assets/overview-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/assets/overview-2.jpg -------------------------------------------------------------------------------- /assets/overview-2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/assets/overview-2.pdf -------------------------------------------------------------------------------- /installation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/installation.md -------------------------------------------------------------------------------- /openrlhf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /openrlhf/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /openrlhf/cli/eval_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/cli/eval_ray.py -------------------------------------------------------------------------------- /openrlhf/cli/train_ppo_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/cli/train_ppo_ray.py -------------------------------------------------------------------------------- /openrlhf/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/datasets/__init__.py -------------------------------------------------------------------------------- /openrlhf/datasets/prompts_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/datasets/prompts_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/datasets/utils.py -------------------------------------------------------------------------------- /openrlhf/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/models/__init__.py -------------------------------------------------------------------------------- /openrlhf/models/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/models/actor.py -------------------------------------------------------------------------------- /openrlhf/models/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/models/loss.py -------------------------------------------------------------------------------- /openrlhf/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/models/model.py -------------------------------------------------------------------------------- /openrlhf/models/ring_attn_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/models/ring_attn_utils.py -------------------------------------------------------------------------------- /openrlhf/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/models/utils.py -------------------------------------------------------------------------------- /openrlhf/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/trainer/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/trainer/evaluator.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/trainer/ppo_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/trainer/ppo_utils/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/data_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/trainer/ppo_utils/data_processor.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/experience_maker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/trainer/ppo_utils/experience_maker.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/kl_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/trainer/ppo_utils/kl_controller.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/replay_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/trainer/ppo_utils/replay_buffer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/trainer/ray/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/evaluator2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/trainer/ray/evaluator2.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/trainer/ray/launcher.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/ppo_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/trainer/ray/ppo_actor.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/ppo_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/trainer/ray/ppo_critic.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/trainer/ray/utils.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/vllm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/trainer/ray/vllm_engine.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/vllm_worker_wrap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/trainer/ray/vllm_worker_wrap.py -------------------------------------------------------------------------------- /openrlhf/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/utils/__init__.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/utils/deepspeed/__init__.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/deepspeed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/utils/deepspeed/deepspeed.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/deepspeed_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/utils/deepspeed/deepspeed_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/distributed_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/utils/distributed_sampler.py -------------------------------------------------------------------------------- /openrlhf/utils/distributed_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/utils/distributed_util.py -------------------------------------------------------------------------------- /openrlhf/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/utils/logging_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/utils/processor.py -------------------------------------------------------------------------------- /openrlhf/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/openrlhf/utils/utils.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/eval_7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/scripts/eval_7b.sh -------------------------------------------------------------------------------- /scripts/eval_vlm_new.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/scripts/eval_vlm_new.sh -------------------------------------------------------------------------------- /scripts/train_vlm_multi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/scripts/train_vlm_multi.sh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TIGER-AI-Lab/VL-Rethinker/HEAD/setup.py --------------------------------------------------------------------------------