├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── dockerfile
    ├── Dockerfile
    └── docker-entrypoint.sh
├── docs
    ├── logo.png
    ├── openrlhf_doc.md
    ├── openrlhf_doc_cn.md
    ├── openrlhf_doc_pr.md
    ├── ppo_examples.md
    └── ray_architecture.png
├── evaluation
    ├── ceval
    │   ├── ceval.py
    │   ├── ceval_data
    │   │   └── readme.md
    │   ├── evaluator.py
    │   ├── llama_evaluator.py
    │   ├── run_ceval.sh
    │   └── subject_mapping.json
    ├── cmmlu
    │   ├── __init__.py
    │   ├── categories.py
    │   ├── cmmlu_data
    │   │   └── readme.md
    │   ├── eval.py
    │   ├── evaluator.py
    │   ├── llama2_evaluator.py
    │   └── run_cmmlu.sh
    └── gpt4
    │   ├── README.md
    │   └── benchmark.jsonl
├── examples
    ├── .ipynb_checkpoints
    │   ├── batch_inference-checkpoint.py
    │   └── train_ppo-checkpoint.py
    ├── batch_inference.py
    ├── interactive_chat.py
    ├── scripts
    │   ├── .ipynb_checkpoints
    │   │   ├── HumanEval-instruction-llama-checkpoint.jsonl
    │   │   ├── ana-checkpoint.py
    │   │   ├── build_openrlhf-checkpoint.sh
    │   │   ├── infer-checkpoint.sh
    │   │   ├── infer_rm-checkpoint.sh
    │   │   ├── preference_pair_new-checkpoint.jsonl
    │   │   ├── train_ppo_llama-checkpoint.sh
    │   │   ├── train_rm_llama-checkpoint.sh
    │   │   └── train_sft_llama-checkpoint.sh
    │   ├── ana.py
    │   ├── build_openrlhf.sh
    │   ├── change_LF_rm.py
    │   ├── change_ans.py
    │   ├── change_format.py
    │   ├── change_rm.py
    │   ├── check.py
    │   ├── check_tokens.py
    │   ├── combine_train_ana.sh
    │   ├── convert.py
    │   ├── docker_run.sh
    │   ├── dup.sh
    │   ├── infer.sh
    │   ├── infer_rm.sh
    │   ├── interactive_chat_llama.sh
    │   ├── nvidia_docker_install.sh
    │   ├── obtain.py
    │   ├── remove_temp.py
    │   ├── reward_ana.py
    │   ├── sel.py
    │   ├── train_conditional_llama.sh
    │   ├── train_continue_pretrain_llama.sh
    │   ├── train_dpo_llama.sh
    │   ├── train_dpo_llama_34b.sh
    │   ├── train_knowledge_distillation.sh
    │   ├── train_kto_llama.sh
    │   ├── train_llama_slurm.sh
    │   ├── train_ppo_llama.sh
    │   ├── train_ppo_llama_ray.sh
    │   ├── train_ppo_llama_ray_70b.sh
    │   ├── train_ppo_llama_ray_slurm.sh
    │   ├── train_rejection_sampling_llama.sh
    │   ├── train_rm_llama.sh
    │   ├── train_sft_jamba_lora.sh
    │   ├── train_sft_llama.sh
    │   └── train_sft_mixtral_lora.sh
    ├── train_dpo.py
    ├── train_kd.py
    ├── train_kto.py
    ├── train_ppo.py
    ├── train_ppo_ray.py
    ├── train_rm.py
    └── train_sft.py
├── openrlhf.egg-info
    ├── PKG-INFO
    ├── SOURCES.txt
    ├── dependency_links.txt
    ├── requires.txt
    └── top_level.txt
├── openrlhf
    ├── .DS_Store
    ├── __init__.py
    ├── __pycache__
    │   └── __init__.cpython-310.pyc
    ├── datasets
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── prompts_dataset.cpython-310.pyc
    │   │   ├── reward_dataset.cpython-310.pyc
    │   │   ├── sft_dataset.cpython-310.pyc
    │   │   ├── unpaired_preference_dataset.cpython-310.pyc
    │   │   └── utils.cpython-310.pyc
    │   ├── prompts_dataset.py
    │   ├── reward_dataset.py
    │   ├── sft_dataset.py
    │   ├── unpaired_preference_dataset.py
    │   └── utils.py
    ├── models
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── actor.cpython-310.pyc
    │   │   ├── loss.cpython-310.pyc
    │   │   ├── model.cpython-310.pyc
    │   │   └── utils.cpython-310.pyc
    │   ├── actor.py
    │   ├── loss.py
    │   ├── model.py
    │   └── utils.py
    ├── trainer
    │   ├── .DS_Store
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── dpo_trainer.cpython-310.pyc
    │   │   ├── kd_trainer.cpython-310.pyc
    │   │   ├── kto_trainer.cpython-310.pyc
    │   │   ├── ppo_trainer.cpython-310.pyc
    │   │   ├── rm_trainer.cpython-310.pyc
    │   │   └── sft_trainer.cpython-310.pyc
    │   ├── dpo_trainer.py
    │   ├── kd_trainer.py
    │   ├── kto_trainer.py
    │   ├── ppo_trainer.py
    │   ├── ppo_utils
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── experience_maker.cpython-310.pyc
    │   │   │   ├── kl_controller.cpython-310.pyc
    │   │   │   └── replay_buffer.cpython-310.pyc
    │   │   ├── experience_maker.py
    │   │   ├── kl_controller.py
    │   │   └── replay_buffer.py
    │   ├── ray
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── launcher.cpython-310.pyc
    │   │   │   ├── ppo_actor.cpython-310.pyc
    │   │   │   ├── ppo_critic.cpython-310.pyc
    │   │   │   ├── vllm_engine.cpython-310.pyc
    │   │   │   └── vllm_worker_wrap.cpython-310.pyc
    │   │   ├── launcher.py
    │   │   ├── ppo_actor.py
    │   │   ├── ppo_critic.py
    │   │   ├── vllm_engine.py
    │   │   └── vllm_worker_wrap.py
    │   ├── rm_trainer.py
    │   └── sft_trainer.py
    └── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-310.pyc
    │       ├── deepspeed.cpython-310.pyc
    │       ├── deepspeed_utils.cpython-310.pyc
    │       ├── distributed_util.cpython-310.pyc
    │       ├── logging.cpython-310.pyc
    │       ├── processor.cpython-310.pyc
    │       └── utils.cpython-310.pyc
    │   ├── deepspeed.py
    │   ├── deepspeed_utils.py
    │   ├── distributed_util.py
    │   ├── logging.py
    │   ├── processor.py
    │   └── utils.py
├── openrlhf_filter
    ├── .DS_Store
    ├── __init__.py
    ├── __pycache__
    │   └── __init__.cpython-310.pyc
    ├── datasets
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── prompts_dataset.cpython-310.pyc
    │   │   ├── reward_dataset.cpython-310.pyc
    │   │   ├── sft_dataset.cpython-310.pyc
    │   │   ├── unpaired_preference_dataset.cpython-310.pyc
    │   │   └── utils.cpython-310.pyc
    │   ├── prompts_dataset.py
    │   ├── reward_dataset.py
    │   ├── sft_dataset.py
    │   ├── unpaired_preference_dataset.py
    │   └── utils.py
    ├── models
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── actor.cpython-310.pyc
    │   │   ├── loss.cpython-310.pyc
    │   │   ├── model.cpython-310.pyc
    │   │   └── utils.cpython-310.pyc
    │   ├── actor.py
    │   ├── loss.py
    │   ├── model.py
    │   └── utils.py
    ├── trainer
    │   ├── .DS_Store
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── dpo_trainer.cpython-310.pyc
    │   │   ├── kd_trainer.cpython-310.pyc
    │   │   ├── kto_trainer.cpython-310.pyc
    │   │   ├── ppo_trainer.cpython-310.pyc
    │   │   ├── rm_trainer.cpython-310.pyc
    │   │   └── sft_trainer.cpython-310.pyc
    │   ├── dpo_trainer.py
    │   ├── kd_trainer.py
    │   ├── kto_trainer.py
    │   ├── ppo_trainer.py
    │   ├── ppo_utils
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── experience_maker.cpython-310.pyc
    │   │   │   ├── kl_controller.cpython-310.pyc
    │   │   │   └── replay_buffer.cpython-310.pyc
    │   │   ├── experience_maker.py
    │   │   ├── kl_controller.py
    │   │   └── replay_buffer.py
    │   ├── ray
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── launcher.cpython-310.pyc
    │   │   │   ├── ppo_actor.cpython-310.pyc
    │   │   │   ├── ppo_critic.cpython-310.pyc
    │   │   │   ├── vllm_engine.cpython-310.pyc
    │   │   │   └── vllm_worker_wrap.cpython-310.pyc
    │   │   ├── launcher.py
    │   │   ├── ppo_actor.py
    │   │   ├── ppo_critic.py
    │   │   ├── vllm_engine.py
    │   │   └── vllm_worker_wrap.py
    │   ├── rm_trainer.py
    │   └── sft_trainer.py
    └── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-310.pyc
    │       ├── deepspeed.cpython-310.pyc
    │       ├── deepspeed_utils.cpython-310.pyc
    │       ├── distributed_util.cpython-310.pyc
    │       ├── logging.cpython-310.pyc
    │       ├── processor.cpython-310.pyc
    │       └── utils.cpython-310.pyc
    │   ├── deepspeed.py
    │   ├── deepspeed_utils.py
    │   ├── distributed_util.py
    │   ├── logging.py
    │   ├── processor.py
    │   └── utils.py
├── pyproject.toml
├── requirements.txt
├── setup.py
└── version.txt


/CONTRIBUTING.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/CONTRIBUTING.md


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/README.md


--------------------------------------------------------------------------------
/dockerfile/Dockerfile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/dockerfile/Dockerfile


--------------------------------------------------------------------------------
/dockerfile/docker-entrypoint.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/dockerfile/docker-entrypoint.sh


--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/docs/logo.png


--------------------------------------------------------------------------------
/docs/openrlhf_doc.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/docs/openrlhf_doc.md


--------------------------------------------------------------------------------
/docs/openrlhf_doc_cn.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/docs/openrlhf_doc_cn.md


--------------------------------------------------------------------------------
/docs/openrlhf_doc_pr.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/docs/openrlhf_doc_pr.md


--------------------------------------------------------------------------------
/docs/ppo_examples.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/docs/ppo_examples.md


--------------------------------------------------------------------------------
/docs/ray_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/docs/ray_architecture.png


--------------------------------------------------------------------------------
/evaluation/ceval/ceval.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/ceval/ceval.py


--------------------------------------------------------------------------------
/evaluation/ceval/ceval_data/readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/ceval/ceval_data/readme.md


--------------------------------------------------------------------------------
/evaluation/ceval/evaluator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/ceval/evaluator.py


--------------------------------------------------------------------------------
/evaluation/ceval/llama_evaluator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/ceval/llama_evaluator.py


--------------------------------------------------------------------------------
/evaluation/ceval/run_ceval.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/ceval/run_ceval.sh


--------------------------------------------------------------------------------
/evaluation/ceval/subject_mapping.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/ceval/subject_mapping.json


--------------------------------------------------------------------------------
/evaluation/cmmlu/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/evaluation/cmmlu/categories.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/cmmlu/categories.py


--------------------------------------------------------------------------------
/evaluation/cmmlu/cmmlu_data/readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/cmmlu/cmmlu_data/readme.md


--------------------------------------------------------------------------------
/evaluation/cmmlu/eval.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/cmmlu/eval.py


--------------------------------------------------------------------------------
/evaluation/cmmlu/evaluator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/cmmlu/evaluator.py


--------------------------------------------------------------------------------
/evaluation/cmmlu/llama2_evaluator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/cmmlu/llama2_evaluator.py


--------------------------------------------------------------------------------
/evaluation/cmmlu/run_cmmlu.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/cmmlu/run_cmmlu.sh


--------------------------------------------------------------------------------
/evaluation/gpt4/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/gpt4/README.md


--------------------------------------------------------------------------------
/evaluation/gpt4/benchmark.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/evaluation/gpt4/benchmark.jsonl


--------------------------------------------------------------------------------
/examples/.ipynb_checkpoints/batch_inference-checkpoint.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/.ipynb_checkpoints/batch_inference-checkpoint.py


--------------------------------------------------------------------------------
/examples/.ipynb_checkpoints/train_ppo-checkpoint.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/.ipynb_checkpoints/train_ppo-checkpoint.py


--------------------------------------------------------------------------------
/examples/batch_inference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/batch_inference.py


--------------------------------------------------------------------------------
/examples/interactive_chat.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/interactive_chat.py


--------------------------------------------------------------------------------
/examples/scripts/.ipynb_checkpoints/HumanEval-instruction-llama-checkpoint.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/.ipynb_checkpoints/HumanEval-instruction-llama-checkpoint.jsonl


--------------------------------------------------------------------------------
/examples/scripts/.ipynb_checkpoints/ana-checkpoint.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/.ipynb_checkpoints/ana-checkpoint.py


--------------------------------------------------------------------------------
/examples/scripts/.ipynb_checkpoints/build_openrlhf-checkpoint.sh:
--------------------------------------------------------------------------------
1 | set -x
2 | 
3 | pip install --user ../../ 


--------------------------------------------------------------------------------
/examples/scripts/.ipynb_checkpoints/infer-checkpoint.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/.ipynb_checkpoints/infer-checkpoint.sh


--------------------------------------------------------------------------------
/examples/scripts/.ipynb_checkpoints/infer_rm-checkpoint.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/.ipynb_checkpoints/infer_rm-checkpoint.sh


--------------------------------------------------------------------------------
/examples/scripts/.ipynb_checkpoints/preference_pair_new-checkpoint.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/.ipynb_checkpoints/preference_pair_new-checkpoint.jsonl


--------------------------------------------------------------------------------
/examples/scripts/.ipynb_checkpoints/train_ppo_llama-checkpoint.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/.ipynb_checkpoints/train_ppo_llama-checkpoint.sh


--------------------------------------------------------------------------------
/examples/scripts/.ipynb_checkpoints/train_rm_llama-checkpoint.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/.ipynb_checkpoints/train_rm_llama-checkpoint.sh


--------------------------------------------------------------------------------
/examples/scripts/.ipynb_checkpoints/train_sft_llama-checkpoint.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/.ipynb_checkpoints/train_sft_llama-checkpoint.sh


--------------------------------------------------------------------------------
/examples/scripts/ana.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/ana.py


--------------------------------------------------------------------------------
/examples/scripts/build_openrlhf.sh:
--------------------------------------------------------------------------------
1 | set -x
2 | 
3 | pip install --user ../../ 


--------------------------------------------------------------------------------
/examples/scripts/change_LF_rm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/change_LF_rm.py


--------------------------------------------------------------------------------
/examples/scripts/change_ans.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/change_ans.py


--------------------------------------------------------------------------------
/examples/scripts/change_format.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/change_format.py


--------------------------------------------------------------------------------
/examples/scripts/change_rm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/change_rm.py


--------------------------------------------------------------------------------
/examples/scripts/check.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/check.py


--------------------------------------------------------------------------------
/examples/scripts/check_tokens.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/check_tokens.py


--------------------------------------------------------------------------------
/examples/scripts/combine_train_ana.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/combine_train_ana.sh


--------------------------------------------------------------------------------
/examples/scripts/convert.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/convert.py


--------------------------------------------------------------------------------
/examples/scripts/docker_run.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/docker_run.sh


--------------------------------------------------------------------------------
/examples/scripts/dup.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/dup.sh


--------------------------------------------------------------------------------
/examples/scripts/infer.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/infer.sh


--------------------------------------------------------------------------------
/examples/scripts/infer_rm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/infer_rm.sh


--------------------------------------------------------------------------------
/examples/scripts/interactive_chat_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/interactive_chat_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/nvidia_docker_install.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/nvidia_docker_install.sh


--------------------------------------------------------------------------------
/examples/scripts/obtain.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/obtain.py


--------------------------------------------------------------------------------
/examples/scripts/remove_temp.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/remove_temp.py


--------------------------------------------------------------------------------
/examples/scripts/reward_ana.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/reward_ana.py


--------------------------------------------------------------------------------
/examples/scripts/sel.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/sel.py


--------------------------------------------------------------------------------
/examples/scripts/train_conditional_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_conditional_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/train_continue_pretrain_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_continue_pretrain_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/train_dpo_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_dpo_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/train_dpo_llama_34b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_dpo_llama_34b.sh


--------------------------------------------------------------------------------
/examples/scripts/train_knowledge_distillation.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_knowledge_distillation.sh


--------------------------------------------------------------------------------
/examples/scripts/train_kto_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_kto_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/train_llama_slurm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_llama_slurm.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_ppo_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_ppo_llama_ray.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_70b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_ppo_llama_ray_70b.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_slurm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_ppo_llama_ray_slurm.sh


--------------------------------------------------------------------------------
/examples/scripts/train_rejection_sampling_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_rejection_sampling_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/train_rm_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_rm_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/train_sft_jamba_lora.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_sft_jamba_lora.sh


--------------------------------------------------------------------------------
/examples/scripts/train_sft_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_sft_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/train_sft_mixtral_lora.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/scripts/train_sft_mixtral_lora.sh


--------------------------------------------------------------------------------
/examples/train_dpo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/train_dpo.py


--------------------------------------------------------------------------------
/examples/train_kd.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/train_kd.py


--------------------------------------------------------------------------------
/examples/train_kto.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/train_kto.py


--------------------------------------------------------------------------------
/examples/train_ppo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/train_ppo.py


--------------------------------------------------------------------------------
/examples/train_ppo_ray.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/train_ppo_ray.py


--------------------------------------------------------------------------------
/examples/train_rm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/train_rm.py


--------------------------------------------------------------------------------
/examples/train_sft.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/examples/train_sft.py


--------------------------------------------------------------------------------
/openrlhf.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf.egg-info/PKG-INFO


--------------------------------------------------------------------------------
/openrlhf.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf.egg-info/SOURCES.txt


--------------------------------------------------------------------------------
/openrlhf.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/openrlhf.egg-info/requires.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf.egg-info/requires.txt


--------------------------------------------------------------------------------
/openrlhf.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | openrlhf
2 | 


--------------------------------------------------------------------------------
/openrlhf/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/.DS_Store


--------------------------------------------------------------------------------
/openrlhf/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/openrlhf/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/__init__.py


--------------------------------------------------------------------------------
/openrlhf/datasets/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/datasets/__pycache__/prompts_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/__pycache__/prompts_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/datasets/__pycache__/reward_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/__pycache__/reward_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/datasets/__pycache__/sft_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/__pycache__/sft_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/datasets/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/datasets/prompts_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/prompts_dataset.py


--------------------------------------------------------------------------------
/openrlhf/datasets/reward_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/reward_dataset.py


--------------------------------------------------------------------------------
/openrlhf/datasets/sft_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/sft_dataset.py


--------------------------------------------------------------------------------
/openrlhf/datasets/unpaired_preference_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/unpaired_preference_dataset.py


--------------------------------------------------------------------------------
/openrlhf/datasets/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/datasets/utils.py


--------------------------------------------------------------------------------
/openrlhf/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/__init__.py


--------------------------------------------------------------------------------
/openrlhf/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/models/__pycache__/actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/__pycache__/actor.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/models/__pycache__/loss.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/__pycache__/loss.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/models/__pycache__/model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/__pycache__/model.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/models/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/models/actor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/actor.py


--------------------------------------------------------------------------------
/openrlhf/models/loss.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/loss.py


--------------------------------------------------------------------------------
/openrlhf/models/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/model.py


--------------------------------------------------------------------------------
/openrlhf/models/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/models/utils.py


--------------------------------------------------------------------------------
/openrlhf/trainer/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/.DS_Store


--------------------------------------------------------------------------------
/openrlhf/trainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/__init__.py


--------------------------------------------------------------------------------
/openrlhf/trainer/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/__pycache__/dpo_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/__pycache__/dpo_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/__pycache__/kd_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/__pycache__/kd_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/__pycache__/kto_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/__pycache__/kto_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/__pycache__/ppo_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/__pycache__/ppo_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/__pycache__/rm_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/__pycache__/rm_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/__pycache__/sft_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/__pycache__/sft_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/dpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/dpo_trainer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/kd_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/kd_trainer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/kto_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/kto_trainer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_trainer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_utils/__init__.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/__pycache__/experience_maker.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_utils/__pycache__/experience_maker.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/__pycache__/kl_controller.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_utils/__pycache__/kl_controller.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/__pycache__/replay_buffer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_utils/__pycache__/replay_buffer.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/experience_maker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_utils/experience_maker.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/kl_controller.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_utils/kl_controller.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/replay_buffer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ppo_utils/replay_buffer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/__init__.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/__pycache__/launcher.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/__pycache__/launcher.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/__pycache__/ppo_actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/__pycache__/ppo_actor.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/__pycache__/ppo_critic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/__pycache__/ppo_critic.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/__pycache__/vllm_engine.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/__pycache__/vllm_engine.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/__pycache__/vllm_worker_wrap.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/__pycache__/vllm_worker_wrap.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/launcher.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/launcher.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/ppo_actor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/ppo_actor.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/ppo_critic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/ppo_critic.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/vllm_engine.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/vllm_engine.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/vllm_worker_wrap.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/ray/vllm_worker_wrap.py


--------------------------------------------------------------------------------
/openrlhf/trainer/rm_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/rm_trainer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/sft_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/trainer/sft_trainer.py


--------------------------------------------------------------------------------
/openrlhf/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/__init__.py


--------------------------------------------------------------------------------
/openrlhf/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/utils/__pycache__/deepspeed.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/__pycache__/deepspeed.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/utils/__pycache__/deepspeed_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/__pycache__/deepspeed_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/utils/__pycache__/distributed_util.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/__pycache__/distributed_util.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/utils/__pycache__/logging.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/__pycache__/logging.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/utils/__pycache__/processor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/__pycache__/processor.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/utils/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/utils/deepspeed.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/deepspeed.py


--------------------------------------------------------------------------------
/openrlhf/utils/deepspeed_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/deepspeed_utils.py


--------------------------------------------------------------------------------
/openrlhf/utils/distributed_util.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/distributed_util.py


--------------------------------------------------------------------------------
/openrlhf/utils/logging.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/logging.py


--------------------------------------------------------------------------------
/openrlhf/utils/processor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/processor.py


--------------------------------------------------------------------------------
/openrlhf/utils/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf/utils/utils.py


--------------------------------------------------------------------------------
/openrlhf_filter/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/.DS_Store


--------------------------------------------------------------------------------
/openrlhf_filter/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/openrlhf_filter/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/__init__.py


--------------------------------------------------------------------------------
/openrlhf_filter/datasets/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/datasets/__pycache__/prompts_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/__pycache__/prompts_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/datasets/__pycache__/reward_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/__pycache__/reward_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/datasets/__pycache__/sft_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/__pycache__/sft_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/datasets/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/datasets/prompts_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/prompts_dataset.py


--------------------------------------------------------------------------------
/openrlhf_filter/datasets/reward_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/reward_dataset.py


--------------------------------------------------------------------------------
/openrlhf_filter/datasets/sft_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/sft_dataset.py


--------------------------------------------------------------------------------
/openrlhf_filter/datasets/unpaired_preference_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/unpaired_preference_dataset.py


--------------------------------------------------------------------------------
/openrlhf_filter/datasets/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/datasets/utils.py


--------------------------------------------------------------------------------
/openrlhf_filter/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/__init__.py


--------------------------------------------------------------------------------
/openrlhf_filter/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/models/__pycache__/actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/__pycache__/actor.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/models/__pycache__/loss.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/__pycache__/loss.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/models/__pycache__/model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/__pycache__/model.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/models/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/models/actor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/actor.py


--------------------------------------------------------------------------------
/openrlhf_filter/models/loss.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/loss.py


--------------------------------------------------------------------------------
/openrlhf_filter/models/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/model.py


--------------------------------------------------------------------------------
/openrlhf_filter/models/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/models/utils.py


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/.DS_Store


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/__init__.py


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/__pycache__/dpo_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/__pycache__/dpo_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/__pycache__/kd_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/__pycache__/kd_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/__pycache__/kto_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/__pycache__/kto_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/__pycache__/ppo_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/__pycache__/ppo_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/__pycache__/rm_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/__pycache__/rm_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/__pycache__/sft_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/__pycache__/sft_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/dpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/dpo_trainer.py


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/kd_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/kd_trainer.py


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/kto_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/kto_trainer.py


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ppo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_trainer.py


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ppo_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_utils/__init__.py


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ppo_utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ppo_utils/__pycache__/experience_maker.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_utils/__pycache__/experience_maker.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ppo_utils/__pycache__/kl_controller.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_utils/__pycache__/kl_controller.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ppo_utils/__pycache__/replay_buffer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_utils/__pycache__/replay_buffer.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ppo_utils/experience_maker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_utils/experience_maker.py


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ppo_utils/kl_controller.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_utils/kl_controller.py


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ppo_utils/replay_buffer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ppo_utils/replay_buffer.py


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ray/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/__init__.py


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ray/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ray/__pycache__/launcher.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/__pycache__/launcher.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ray/__pycache__/ppo_actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/__pycache__/ppo_actor.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ray/__pycache__/ppo_critic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/__pycache__/ppo_critic.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ray/__pycache__/vllm_engine.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/__pycache__/vllm_engine.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ray/__pycache__/vllm_worker_wrap.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/__pycache__/vllm_worker_wrap.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ray/launcher.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/launcher.py


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ray/ppo_actor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/ppo_actor.py


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ray/ppo_critic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/ppo_critic.py


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ray/vllm_engine.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/vllm_engine.py


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/ray/vllm_worker_wrap.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/ray/vllm_worker_wrap.py


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/rm_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/rm_trainer.py


--------------------------------------------------------------------------------
/openrlhf_filter/trainer/sft_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/trainer/sft_trainer.py


--------------------------------------------------------------------------------
/openrlhf_filter/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/__init__.py


--------------------------------------------------------------------------------
/openrlhf_filter/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/utils/__pycache__/deepspeed.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/__pycache__/deepspeed.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/utils/__pycache__/deepspeed_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/__pycache__/deepspeed_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/utils/__pycache__/distributed_util.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/__pycache__/distributed_util.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/utils/__pycache__/logging.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/__pycache__/logging.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/utils/__pycache__/processor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/__pycache__/processor.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/utils/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf_filter/utils/deepspeed.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/deepspeed.py


--------------------------------------------------------------------------------
/openrlhf_filter/utils/deepspeed_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/deepspeed_utils.py


--------------------------------------------------------------------------------
/openrlhf_filter/utils/distributed_util.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/distributed_util.py


--------------------------------------------------------------------------------
/openrlhf_filter/utils/logging.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/logging.py


--------------------------------------------------------------------------------
/openrlhf_filter/utils/processor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/processor.py


--------------------------------------------------------------------------------
/openrlhf_filter/utils/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/openrlhf_filter/utils/utils.py


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/pyproject.toml


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/requirements.txt


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/swtheing/PF-PPO-RLHF/HEAD/setup.py


--------------------------------------------------------------------------------
/version.txt:
--------------------------------------------------------------------------------
1 | 0.2.8


--------------------------------------------------------------------------------