├── .gitignore ├── LICENSE ├── README.md ├── callbacks.py ├── configs ├── bo2_pythia410m_tldr.yml ├── deepspeed_zero2.yaml ├── evaluate_tldr.yml ├── generate_tldr.yml ├── onlinedpo_pythia1b_tldr.yml ├── onlinedpo_pythia1b_tldr_4gpu.yml ├── onlinedpo_pythia1b_tldr_vllm_1gpu.yml ├── onlinedpo_pythia1b_tldr_vllm_4gpu.yml ├── onlinedpo_pythia2.8b_tldr.yml ├── onlinedpo_pythia2.8b_tldr_4gpu.yml ├── onlinedpo_pythia2.8b_tldr_vllm_4gpu.yml ├── onlinedpo_pythia410m_tldr.yml ├── onlinedpo_pythia410m_tldr_4gpu.yml ├── onlinedpo_pythia410m_tldr_vllm.yml ├── onlinedpo_pythia410m_tldr_vllm_1gpu.yml ├── onlinedpo_pythia410m_tldr_vllm_4gpu.yml ├── ppo_pythia410m_tldr.yml ├── relabel_rm.yml ├── rloo_pythia410m_tldr.yml ├── rm_pythia1b_tldr.yml ├── rm_pythia2.8b_tldr.yml ├── rm_pythia410m_tldr.yml ├── sft_pythia1b_tldr.yml └── sft_pythia2.8b_tldr.yml ├── env.sh ├── generate_eval.sh ├── generate_for_eval.py ├── load_and_eval.py ├── online_dpo.py ├── ppov2.py ├── pyproject.toml ├── relabel_with_rm.py ├── requirements.txt ├── reward_modeling.py ├── rloo.py ├── sft.py ├── src ├── __init__.py ├── dpo_trainer.py ├── kl.py ├── online_bok_trainer.py ├── online_dpo_single_vllm_trainer.py ├── online_dpo_trainer.py ├── online_dpo_vllm_trainer.py ├── ppov2_trainer.py ├── rloo_trainer.py ├── rloo_trainer_vllm.py ├── utils.py └── vllm_utils.py ├── train.sh └── train_generate_eval.sh /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/README.md -------------------------------------------------------------------------------- /callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/callbacks.py -------------------------------------------------------------------------------- /configs/bo2_pythia410m_tldr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/bo2_pythia410m_tldr.yml -------------------------------------------------------------------------------- /configs/deepspeed_zero2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/deepspeed_zero2.yaml -------------------------------------------------------------------------------- /configs/evaluate_tldr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/evaluate_tldr.yml -------------------------------------------------------------------------------- /configs/generate_tldr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/generate_tldr.yml -------------------------------------------------------------------------------- /configs/onlinedpo_pythia1b_tldr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/onlinedpo_pythia1b_tldr.yml -------------------------------------------------------------------------------- /configs/onlinedpo_pythia1b_tldr_4gpu.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/onlinedpo_pythia1b_tldr_4gpu.yml -------------------------------------------------------------------------------- /configs/onlinedpo_pythia1b_tldr_vllm_1gpu.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/onlinedpo_pythia1b_tldr_vllm_1gpu.yml -------------------------------------------------------------------------------- /configs/onlinedpo_pythia1b_tldr_vllm_4gpu.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/onlinedpo_pythia1b_tldr_vllm_4gpu.yml -------------------------------------------------------------------------------- /configs/onlinedpo_pythia2.8b_tldr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/onlinedpo_pythia2.8b_tldr.yml -------------------------------------------------------------------------------- /configs/onlinedpo_pythia2.8b_tldr_4gpu.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/onlinedpo_pythia2.8b_tldr_4gpu.yml -------------------------------------------------------------------------------- /configs/onlinedpo_pythia2.8b_tldr_vllm_4gpu.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/onlinedpo_pythia2.8b_tldr_vllm_4gpu.yml -------------------------------------------------------------------------------- /configs/onlinedpo_pythia410m_tldr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/onlinedpo_pythia410m_tldr.yml -------------------------------------------------------------------------------- /configs/onlinedpo_pythia410m_tldr_4gpu.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/onlinedpo_pythia410m_tldr_4gpu.yml -------------------------------------------------------------------------------- /configs/onlinedpo_pythia410m_tldr_vllm.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/onlinedpo_pythia410m_tldr_vllm.yml -------------------------------------------------------------------------------- /configs/onlinedpo_pythia410m_tldr_vllm_1gpu.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/onlinedpo_pythia410m_tldr_vllm_1gpu.yml -------------------------------------------------------------------------------- /configs/onlinedpo_pythia410m_tldr_vllm_4gpu.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/onlinedpo_pythia410m_tldr_vllm_4gpu.yml -------------------------------------------------------------------------------- /configs/ppo_pythia410m_tldr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/ppo_pythia410m_tldr.yml -------------------------------------------------------------------------------- /configs/relabel_rm.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/relabel_rm.yml -------------------------------------------------------------------------------- /configs/rloo_pythia410m_tldr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/rloo_pythia410m_tldr.yml -------------------------------------------------------------------------------- /configs/rm_pythia1b_tldr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/rm_pythia1b_tldr.yml -------------------------------------------------------------------------------- /configs/rm_pythia2.8b_tldr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/rm_pythia2.8b_tldr.yml -------------------------------------------------------------------------------- /configs/rm_pythia410m_tldr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/rm_pythia410m_tldr.yml -------------------------------------------------------------------------------- /configs/sft_pythia1b_tldr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/sft_pythia1b_tldr.yml -------------------------------------------------------------------------------- /configs/sft_pythia2.8b_tldr.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/configs/sft_pythia2.8b_tldr.yml -------------------------------------------------------------------------------- /env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/env.sh -------------------------------------------------------------------------------- /generate_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/generate_eval.sh -------------------------------------------------------------------------------- /generate_for_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/generate_for_eval.py -------------------------------------------------------------------------------- /load_and_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/load_and_eval.py -------------------------------------------------------------------------------- /online_dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/online_dpo.py -------------------------------------------------------------------------------- /ppov2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/ppov2.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/pyproject.toml -------------------------------------------------------------------------------- /relabel_with_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/relabel_with_rm.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/requirements.txt -------------------------------------------------------------------------------- /reward_modeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/reward_modeling.py -------------------------------------------------------------------------------- /rloo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/rloo.py -------------------------------------------------------------------------------- /sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/sft.py -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | from . import perplexity 2 | -------------------------------------------------------------------------------- /src/dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/src/dpo_trainer.py -------------------------------------------------------------------------------- /src/kl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/src/kl.py -------------------------------------------------------------------------------- /src/online_bok_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/src/online_bok_trainer.py -------------------------------------------------------------------------------- /src/online_dpo_single_vllm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/src/online_dpo_single_vllm_trainer.py -------------------------------------------------------------------------------- /src/online_dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/src/online_dpo_trainer.py -------------------------------------------------------------------------------- /src/online_dpo_vllm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/src/online_dpo_vllm_trainer.py -------------------------------------------------------------------------------- /src/ppov2_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/src/ppov2_trainer.py -------------------------------------------------------------------------------- /src/rloo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/src/rloo_trainer.py -------------------------------------------------------------------------------- /src/rloo_trainer_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/src/rloo_trainer_vllm.py -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/src/utils.py -------------------------------------------------------------------------------- /src/vllm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/src/vllm_utils.py -------------------------------------------------------------------------------- /train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/train.sh -------------------------------------------------------------------------------- /train_generate_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mnoukhov/async_rlhf/HEAD/train_generate_eval.sh --------------------------------------------------------------------------------