├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── dockerfile ├── Dockerfile └── docker-entrypoint.sh ├── docs ├── logo.png ├── openrlhf_doc.md ├── openrlhf_doc_cn.md ├── openrlhf_doc_pr.md ├── ppo_examples.md └── ray_architecture.png ├── evaluation ├── ceval │ ├── ceval.py │ ├── ceval_data │ │ └── readme.md │ ├── evaluator.py │ ├── llama_evaluator.py │ ├── run_ceval.sh │ └── subject_mapping.json ├── cmmlu │ ├── __init__.py │ ├── categories.py │ ├── cmmlu_data │ │ └── readme.md │ ├── eval.py │ ├── evaluator.py │ ├── llama2_evaluator.py │ └── run_cmmlu.sh └── gpt4 │ ├── README.md │ └── benchmark.jsonl ├── examples ├── .ipynb_checkpoints │ ├── batch_inference-checkpoint.py │ └── train_ppo-checkpoint.py ├── batch_inference.py ├── interactive_chat.py ├── scripts │ ├── .ipynb_checkpoints │ │ ├── HumanEval-instruction-llama-checkpoint.jsonl │ │ ├── ana-checkpoint.py │ │ ├── build_openrlhf-checkpoint.sh │ │ ├── infer-checkpoint.sh │ │ ├── infer_rm-checkpoint.sh │ │ ├── preference_pair_new-checkpoint.jsonl │ │ ├── train_ppo_llama-checkpoint.sh │ │ ├── train_rm_llama-checkpoint.sh │ │ └── train_sft_llama-checkpoint.sh │ ├── ana.py │ ├── build_openrlhf.sh │ ├── change_LF_rm.py │ ├── change_ans.py │ ├── change_format.py │ ├── change_rm.py │ ├── check.py │ ├── check_tokens.py │ ├── combine_train_ana.sh │ ├── convert.py │ ├── docker_run.sh │ ├── dup.sh │ ├── infer.sh │ ├── infer_rm.sh │ ├── interactive_chat_llama.sh │ ├── nvidia_docker_install.sh │ ├── obtain.py │ ├── remove_temp.py │ ├── reward_ana.py │ ├── sel.py │ ├── train_conditional_llama.sh │ ├── train_continue_pretrain_llama.sh │ ├── train_dpo_llama.sh │ ├── train_dpo_llama_34b.sh │ ├── train_knowledge_distillation.sh │ ├── train_kto_llama.sh │ ├── train_llama_slurm.sh │ ├── train_ppo_llama.sh │ ├── train_ppo_llama_ray.sh │ ├── train_ppo_llama_ray_70b.sh │ ├── train_ppo_llama_ray_slurm.sh │ ├── train_rejection_sampling_llama.sh │ ├── train_rm_llama.sh │ ├── train_sft_jamba_lora.sh │ ├── train_sft_llama.sh │ └── train_sft_mixtral_lora.sh ├── train_dpo.py ├── train_kd.py ├── train_kto.py ├── train_ppo.py ├── train_ppo_ray.py ├── train_rm.py └── train_sft.py ├── openrlhf.egg-info ├── PKG-INFO ├── SOURCES.txt ├── dependency_links.txt ├── requires.txt └── top_level.txt ├── openrlhf ├── .DS_Store ├── __init__.py ├── __pycache__ │ └── __init__.cpython-310.pyc ├── datasets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── prompts_dataset.cpython-310.pyc │ │ ├── reward_dataset.cpython-310.pyc │ │ ├── sft_dataset.cpython-310.pyc │ │ ├── unpaired_preference_dataset.cpython-310.pyc │ │ └── utils.cpython-310.pyc │ ├── prompts_dataset.py │ ├── reward_dataset.py │ ├── sft_dataset.py │ ├── unpaired_preference_dataset.py │ └── utils.py ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── actor.cpython-310.pyc │ │ ├── loss.cpython-310.pyc │ │ ├── model.cpython-310.pyc │ │ └── utils.cpython-310.pyc │ ├── actor.py │ ├── loss.py │ ├── model.py │ └── utils.py ├── trainer │ ├── .DS_Store │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── dpo_trainer.cpython-310.pyc │ │ ├── kd_trainer.cpython-310.pyc │ │ ├── kto_trainer.cpython-310.pyc │ │ ├── ppo_trainer.cpython-310.pyc │ │ ├── rm_trainer.cpython-310.pyc │ │ └── sft_trainer.cpython-310.pyc │ ├── dpo_trainer.py │ ├── kd_trainer.py │ ├── kto_trainer.py │ ├── ppo_trainer.py │ ├── ppo_utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── experience_maker.cpython-310.pyc │ │ │ ├── kl_controller.cpython-310.pyc │ │ │ └── replay_buffer.cpython-310.pyc │ │ ├── experience_maker.py │ │ ├── kl_controller.py │ │ └── replay_buffer.py │ ├── ray │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── launcher.cpython-310.pyc │ │ │ ├── ppo_actor.cpython-310.pyc │ │ │ ├── ppo_critic.cpython-310.pyc │ │ │ ├── vllm_engine.cpython-310.pyc │ │ │ └── vllm_worker_wrap.cpython-310.pyc │ │ ├── launcher.py │ │ ├── ppo_actor.py │ │ ├── ppo_critic.py │ │ ├── vllm_engine.py │ │ └── vllm_worker_wrap.py │ ├── rm_trainer.py │ └── sft_trainer.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── deepspeed.cpython-310.pyc │ ├── deepspeed_utils.cpython-310.pyc │ ├── distributed_util.cpython-310.pyc │ ├── logging.cpython-310.pyc │ ├── processor.cpython-310.pyc │ └── utils.cpython-310.pyc │ ├── deepspeed.py │ ├── deepspeed_utils.py │ ├── distributed_util.py │ ├── logging.py │ ├── processor.py │ └── utils.py ├── openrlhf_filter ├── .DS_Store ├── __init__.py ├── __pycache__ │ └── __init__.cpython-310.pyc ├── datasets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── prompts_dataset.cpython-310.pyc │ │ ├── reward_dataset.cpython-310.pyc │ │ ├── sft_dataset.cpython-310.pyc │ │ ├── unpaired_preference_dataset.cpython-310.pyc │ │ └── utils.cpython-310.pyc │ ├── prompts_dataset.py │ ├── reward_dataset.py │ ├── sft_dataset.py │ ├── unpaired_preference_dataset.py │ └── utils.py ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── actor.cpython-310.pyc │ │ ├── loss.cpython-310.pyc │ │ ├── model.cpython-310.pyc │ │ └── utils.cpython-310.pyc │ ├── actor.py │ ├── loss.py │ ├── model.py │ └── utils.py ├── trainer │ ├── .DS_Store │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── dpo_trainer.cpython-310.pyc │ │ ├── kd_trainer.cpython-310.pyc │ │ ├── kto_trainer.cpython-310.pyc │ │ ├── ppo_trainer.cpython-310.pyc │ │ ├── rm_trainer.cpython-310.pyc │ │ └── sft_trainer.cpython-310.pyc │ ├── dpo_trainer.py │ ├── kd_trainer.py │ ├── kto_trainer.py │ ├── ppo_trainer.py │ ├── ppo_utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── experience_maker.cpython-310.pyc │ │ │ ├── kl_controller.cpython-310.pyc │ │ │ └── replay_buffer.cpython-310.pyc │ │ ├── experience_maker.py │ │ ├── kl_controller.py │ │ └── replay_buffer.py │ ├── ray │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── launcher.cpython-310.pyc │ │ │ ├── ppo_actor.cpython-310.pyc │ │ │ ├── ppo_critic.cpython-310.pyc │ │ │ ├── vllm_engine.cpython-310.pyc │ │ │ └── vllm_worker_wrap.cpython-310.pyc │ │ ├── launcher.py │ │ ├── ppo_actor.py │ │ ├── ppo_critic.py │ │ ├── vllm_engine.py │ │ └── vllm_worker_wrap.py │ ├── rm_trainer.py │ └── sft_trainer.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── deepspeed.cpython-310.pyc │ ├── deepspeed_utils.cpython-310.pyc │ ├── distributed_util.cpython-310.pyc │ ├── logging.cpython-310.pyc │ ├── processor.cpython-310.pyc │ └── utils.cpython-310.pyc │ ├── deepspeed.py │ ├── deepspeed_utils.py │ ├── distributed_util.py │ ├── logging.py │ ├── processor.py │ └── utils.py ├── pyproject.toml ├── requirements.txt ├── setup.py └── version.txt /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/README.md -------------------------------------------------------------------------------- /dockerfile/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/dockerfile/Dockerfile -------------------------------------------------------------------------------- /dockerfile/docker-entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/dockerfile/docker-entrypoint.sh -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/docs/logo.png -------------------------------------------------------------------------------- /docs/openrlhf_doc.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/docs/openrlhf_doc.md -------------------------------------------------------------------------------- /docs/openrlhf_doc_cn.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/docs/openrlhf_doc_cn.md -------------------------------------------------------------------------------- /docs/openrlhf_doc_pr.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/docs/openrlhf_doc_pr.md -------------------------------------------------------------------------------- /docs/ppo_examples.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/docs/ppo_examples.md -------------------------------------------------------------------------------- /docs/ray_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/docs/ray_architecture.png -------------------------------------------------------------------------------- /evaluation/ceval/ceval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/ceval/ceval.py -------------------------------------------------------------------------------- /evaluation/ceval/ceval_data/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/ceval/ceval_data/readme.md -------------------------------------------------------------------------------- /evaluation/ceval/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/ceval/evaluator.py -------------------------------------------------------------------------------- /evaluation/ceval/llama_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/ceval/llama_evaluator.py -------------------------------------------------------------------------------- /evaluation/ceval/run_ceval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/ceval/run_ceval.sh -------------------------------------------------------------------------------- /evaluation/ceval/subject_mapping.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/ceval/subject_mapping.json -------------------------------------------------------------------------------- /evaluation/cmmlu/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/cmmlu/categories.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/cmmlu/categories.py -------------------------------------------------------------------------------- /evaluation/cmmlu/cmmlu_data/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/cmmlu/cmmlu_data/readme.md -------------------------------------------------------------------------------- /evaluation/cmmlu/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/cmmlu/eval.py -------------------------------------------------------------------------------- /evaluation/cmmlu/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/cmmlu/evaluator.py -------------------------------------------------------------------------------- /evaluation/cmmlu/llama2_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/cmmlu/llama2_evaluator.py -------------------------------------------------------------------------------- /evaluation/cmmlu/run_cmmlu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/cmmlu/run_cmmlu.sh -------------------------------------------------------------------------------- /evaluation/gpt4/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/gpt4/README.md -------------------------------------------------------------------------------- /evaluation/gpt4/benchmark.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/gpt4/benchmark.jsonl -------------------------------------------------------------------------------- /examples/.ipynb_checkpoints/batch_inference-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/.ipynb_checkpoints/batch_inference-checkpoint.py -------------------------------------------------------------------------------- /examples/.ipynb_checkpoints/train_ppo-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/.ipynb_checkpoints/train_ppo-checkpoint.py -------------------------------------------------------------------------------- /examples/batch_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/batch_inference.py -------------------------------------------------------------------------------- /examples/interactive_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/interactive_chat.py -------------------------------------------------------------------------------- /examples/scripts/.ipynb_checkpoints/HumanEval-instruction-llama-checkpoint.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/.ipynb_checkpoints/HumanEval-instruction-llama-checkpoint.jsonl -------------------------------------------------------------------------------- /examples/scripts/.ipynb_checkpoints/ana-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/.ipynb_checkpoints/ana-checkpoint.py -------------------------------------------------------------------------------- /examples/scripts/.ipynb_checkpoints/build_openrlhf-checkpoint.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | pip install --user ../../ -------------------------------------------------------------------------------- /examples/scripts/.ipynb_checkpoints/infer-checkpoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/.ipynb_checkpoints/infer-checkpoint.sh -------------------------------------------------------------------------------- /examples/scripts/.ipynb_checkpoints/infer_rm-checkpoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/.ipynb_checkpoints/infer_rm-checkpoint.sh -------------------------------------------------------------------------------- /examples/scripts/.ipynb_checkpoints/preference_pair_new-checkpoint.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/.ipynb_checkpoints/preference_pair_new-checkpoint.jsonl -------------------------------------------------------------------------------- /examples/scripts/.ipynb_checkpoints/train_ppo_llama-checkpoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/.ipynb_checkpoints/train_ppo_llama-checkpoint.sh -------------------------------------------------------------------------------- /examples/scripts/.ipynb_checkpoints/train_rm_llama-checkpoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/.ipynb_checkpoints/train_rm_llama-checkpoint.sh -------------------------------------------------------------------------------- /examples/scripts/.ipynb_checkpoints/train_sft_llama-checkpoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/.ipynb_checkpoints/train_sft_llama-checkpoint.sh -------------------------------------------------------------------------------- /examples/scripts/ana.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/ana.py -------------------------------------------------------------------------------- /examples/scripts/build_openrlhf.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | pip install --user ../../ -------------------------------------------------------------------------------- /examples/scripts/change_LF_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/change_LF_rm.py -------------------------------------------------------------------------------- /examples/scripts/change_ans.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/change_ans.py -------------------------------------------------------------------------------- /examples/scripts/change_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/change_format.py -------------------------------------------------------------------------------- /examples/scripts/change_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/change_rm.py -------------------------------------------------------------------------------- /examples/scripts/check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/check.py -------------------------------------------------------------------------------- /examples/scripts/check_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/check_tokens.py -------------------------------------------------------------------------------- /examples/scripts/combine_train_ana.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/combine_train_ana.sh -------------------------------------------------------------------------------- /examples/scripts/convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/convert.py -------------------------------------------------------------------------------- /examples/scripts/docker_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/docker_run.sh -------------------------------------------------------------------------------- /examples/scripts/dup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/dup.sh -------------------------------------------------------------------------------- /examples/scripts/infer.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/infer.sh -------------------------------------------------------------------------------- /examples/scripts/infer_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/infer_rm.sh -------------------------------------------------------------------------------- /examples/scripts/interactive_chat_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/interactive_chat_llama.sh -------------------------------------------------------------------------------- /examples/scripts/nvidia_docker_install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/nvidia_docker_install.sh -------------------------------------------------------------------------------- /examples/scripts/obtain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/obtain.py -------------------------------------------------------------------------------- /examples/scripts/remove_temp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/remove_temp.py -------------------------------------------------------------------------------- /examples/scripts/reward_ana.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/reward_ana.py -------------------------------------------------------------------------------- /examples/scripts/sel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/sel.py -------------------------------------------------------------------------------- /examples/scripts/train_conditional_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_conditional_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_continue_pretrain_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_continue_pretrain_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_dpo_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_dpo_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_dpo_llama_34b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_dpo_llama_34b.sh -------------------------------------------------------------------------------- /examples/scripts/train_knowledge_distillation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_knowledge_distillation.sh -------------------------------------------------------------------------------- /examples/scripts/train_kto_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_kto_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_llama_slurm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_llama_slurm.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_ppo_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_ppo_llama_ray.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_70b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_ppo_llama_ray_70b.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_slurm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_ppo_llama_ray_slurm.sh -------------------------------------------------------------------------------- /examples/scripts/train_rejection_sampling_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_rejection_sampling_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_rm_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_rm_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_sft_jamba_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_sft_jamba_lora.sh -------------------------------------------------------------------------------- /examples/scripts/train_sft_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_sft_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_sft_mixtral_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_sft_mixtral_lora.sh -------------------------------------------------------------------------------- /examples/train_dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/train_dpo.py -------------------------------------------------------------------------------- /examples/train_kd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/train_kd.py -------------------------------------------------------------------------------- /examples/train_kto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/train_kto.py -------------------------------------------------------------------------------- /examples/train_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/train_ppo.py -------------------------------------------------------------------------------- /examples/train_ppo_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/train_ppo_ray.py -------------------------------------------------------------------------------- /examples/train_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/train_rm.py -------------------------------------------------------------------------------- /examples/train_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/train_sft.py -------------------------------------------------------------------------------- /openrlhf.egg-info/PKG-INFO: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf.egg-info/PKG-INFO -------------------------------------------------------------------------------- /openrlhf.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf.egg-info/SOURCES.txt -------------------------------------------------------------------------------- /openrlhf.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /openrlhf.egg-info/requires.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf.egg-info/requires.txt -------------------------------------------------------------------------------- /openrlhf.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | openrlhf 2 | -------------------------------------------------------------------------------- /openrlhf/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/.DS_Store -------------------------------------------------------------------------------- /openrlhf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /openrlhf/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/__init__.py -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/prompts_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/__pycache__/prompts_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/reward_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/__pycache__/reward_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/sft_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/__pycache__/sft_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/prompts_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/prompts_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/reward_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/sft_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/unpaired_preference_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/unpaired_preference_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/utils.py -------------------------------------------------------------------------------- /openrlhf/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/__init__.py -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/actor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/__pycache__/actor.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/loss.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/__pycache__/loss.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/actor.py -------------------------------------------------------------------------------- /openrlhf/models/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/loss.py -------------------------------------------------------------------------------- /openrlhf/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/model.py -------------------------------------------------------------------------------- /openrlhf/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/utils.py -------------------------------------------------------------------------------- /openrlhf/trainer/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/.DS_Store -------------------------------------------------------------------------------- /openrlhf/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/dpo_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/__pycache__/dpo_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/kd_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/__pycache__/kd_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/kto_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/__pycache__/kto_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/ppo_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/__pycache__/ppo_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/rm_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/__pycache__/rm_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/sft_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/__pycache__/sft_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/dpo_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/kd_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/kd_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/kto_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/kto_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_utils/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__pycache__/experience_maker.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_utils/__pycache__/experience_maker.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__pycache__/kl_controller.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_utils/__pycache__/kl_controller.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__pycache__/replay_buffer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_utils/__pycache__/replay_buffer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/experience_maker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_utils/experience_maker.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/kl_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_utils/kl_controller.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/replay_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_utils/replay_buffer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/launcher.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/__pycache__/launcher.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/ppo_actor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/__pycache__/ppo_actor.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/ppo_critic.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/__pycache__/ppo_critic.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/vllm_engine.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/__pycache__/vllm_engine.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/vllm_worker_wrap.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/__pycache__/vllm_worker_wrap.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/launcher.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/ppo_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/ppo_actor.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/ppo_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/ppo_critic.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/vllm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/vllm_engine.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/vllm_worker_wrap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/vllm_worker_wrap.py -------------------------------------------------------------------------------- /openrlhf/trainer/rm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/rm_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/sft_trainer.py -------------------------------------------------------------------------------- /openrlhf/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/__init__.py -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/deepspeed.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/__pycache__/deepspeed.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/deepspeed_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/__pycache__/deepspeed_utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/distributed_util.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/__pycache__/distributed_util.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/logging.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/__pycache__/logging.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/processor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/__pycache__/processor.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/deepspeed.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/deepspeed_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/distributed_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/distributed_util.py -------------------------------------------------------------------------------- /openrlhf/utils/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/logging.py -------------------------------------------------------------------------------- /openrlhf/utils/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/processor.py -------------------------------------------------------------------------------- /openrlhf/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/utils.py -------------------------------------------------------------------------------- /openrlhf_filter/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/.DS_Store -------------------------------------------------------------------------------- /openrlhf_filter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /openrlhf_filter/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/__init__.py -------------------------------------------------------------------------------- /openrlhf_filter/datasets/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/datasets/__pycache__/prompts_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/__pycache__/prompts_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/datasets/__pycache__/reward_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/__pycache__/reward_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/datasets/__pycache__/sft_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/__pycache__/sft_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/datasets/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/datasets/prompts_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/prompts_dataset.py -------------------------------------------------------------------------------- /openrlhf_filter/datasets/reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/reward_dataset.py -------------------------------------------------------------------------------- /openrlhf_filter/datasets/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/sft_dataset.py -------------------------------------------------------------------------------- /openrlhf_filter/datasets/unpaired_preference_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/unpaired_preference_dataset.py -------------------------------------------------------------------------------- /openrlhf_filter/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/utils.py -------------------------------------------------------------------------------- /openrlhf_filter/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/__init__.py -------------------------------------------------------------------------------- /openrlhf_filter/models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/models/__pycache__/actor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/__pycache__/actor.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/models/__pycache__/loss.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/__pycache__/loss.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/models/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/models/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/models/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/actor.py -------------------------------------------------------------------------------- /openrlhf_filter/models/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/loss.py -------------------------------------------------------------------------------- /openrlhf_filter/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/model.py -------------------------------------------------------------------------------- /openrlhf_filter/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/utils.py -------------------------------------------------------------------------------- /openrlhf_filter/trainer/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/.DS_Store -------------------------------------------------------------------------------- /openrlhf_filter/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/__init__.py -------------------------------------------------------------------------------- /openrlhf_filter/trainer/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/trainer/__pycache__/dpo_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/__pycache__/dpo_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/trainer/__pycache__/kd_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/__pycache__/kd_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/trainer/__pycache__/kto_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/__pycache__/kto_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/trainer/__pycache__/ppo_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/__pycache__/ppo_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/trainer/__pycache__/rm_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/__pycache__/rm_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/trainer/__pycache__/sft_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/__pycache__/sft_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/trainer/dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/dpo_trainer.py -------------------------------------------------------------------------------- /openrlhf_filter/trainer/kd_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/kd_trainer.py -------------------------------------------------------------------------------- /openrlhf_filter/trainer/kto_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/kto_trainer.py -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_trainer.py -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ppo_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_utils/__init__.py -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ppo_utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ppo_utils/__pycache__/experience_maker.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_utils/__pycache__/experience_maker.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ppo_utils/__pycache__/kl_controller.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_utils/__pycache__/kl_controller.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ppo_utils/__pycache__/replay_buffer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_utils/__pycache__/replay_buffer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ppo_utils/experience_maker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_utils/experience_maker.py -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ppo_utils/kl_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_utils/kl_controller.py -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ppo_utils/replay_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_utils/replay_buffer.py -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/__init__.py -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ray/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ray/__pycache__/launcher.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/__pycache__/launcher.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ray/__pycache__/ppo_actor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/__pycache__/ppo_actor.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ray/__pycache__/ppo_critic.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/__pycache__/ppo_critic.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ray/__pycache__/vllm_engine.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/__pycache__/vllm_engine.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ray/__pycache__/vllm_worker_wrap.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/__pycache__/vllm_worker_wrap.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ray/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/launcher.py -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ray/ppo_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/ppo_actor.py -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ray/ppo_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/ppo_critic.py -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ray/vllm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/vllm_engine.py -------------------------------------------------------------------------------- /openrlhf_filter/trainer/ray/vllm_worker_wrap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/vllm_worker_wrap.py -------------------------------------------------------------------------------- /openrlhf_filter/trainer/rm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/rm_trainer.py -------------------------------------------------------------------------------- /openrlhf_filter/trainer/sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/sft_trainer.py -------------------------------------------------------------------------------- /openrlhf_filter/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/__init__.py -------------------------------------------------------------------------------- /openrlhf_filter/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/utils/__pycache__/deepspeed.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/__pycache__/deepspeed.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/utils/__pycache__/deepspeed_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/__pycache__/deepspeed_utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/utils/__pycache__/distributed_util.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/__pycache__/distributed_util.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/utils/__pycache__/logging.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/__pycache__/logging.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/utils/__pycache__/processor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/__pycache__/processor.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/utils/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf_filter/utils/deepspeed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/deepspeed.py -------------------------------------------------------------------------------- /openrlhf_filter/utils/deepspeed_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/deepspeed_utils.py -------------------------------------------------------------------------------- /openrlhf_filter/utils/distributed_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/distributed_util.py -------------------------------------------------------------------------------- /openrlhf_filter/utils/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/logging.py -------------------------------------------------------------------------------- /openrlhf_filter/utils/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/processor.py -------------------------------------------------------------------------------- /openrlhf_filter/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/utils.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/setup.py -------------------------------------------------------------------------------- /version.txt: -------------------------------------------------------------------------------- 1 | 0.2.8 --------------------------------------------------------------------------------