├── .github └── workflows │ └── pypi-nightly.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── LICENSE ├── OREO.png ├── Offline Reinforcement Learning for LLM Multi-Step Reasoning.pdf ├── README.md ├── README.openrlhf.md ├── README_zh.md ├── dockerfile ├── Dockerfile ├── docker-entrypoint.sh └── sources.list ├── docs ├── logo.png ├── openrlhf_doc.md ├── openrlhf_doc_cn.md ├── openrlhf_doc_pr.md ├── ppo_examples.md └── ray_architecture.png ├── evaluation ├── ceval │ ├── ceval.py │ ├── ceval_data │ │ └── readme.md │ ├── evaluator.py │ ├── llama_evaluator.py │ ├── run_ceval.sh │ └── subject_mapping.json ├── cmmlu │ ├── __init__.py │ ├── categories.py │ ├── cmmlu_data │ │ └── readme.md │ ├── eval.py │ ├── evaluator.py │ ├── llama2_evaluator.py │ └── run_cmmlu.sh └── gpt4 │ ├── README.md │ └── benchmark.jsonl ├── examples ├── batch_inference.py ├── check_data.py ├── gen_balanced.py ├── gen_balanced_new.py ├── gen_balanced_new_qwen.py ├── interactive_chat.py ├── length_distribution.py ├── merge_lora.py ├── run_gsm8k.py ├── scratch │ ├── a.py │ ├── a.sh │ ├── a.txt │ ├── alfworld_length.py │ ├── b.sh │ ├── balanced_dart.py │ ├── batch_sbs.bak.py │ ├── batch_sbs.py │ ├── batch_sbs_qwen.py │ ├── bon.py │ ├── check_adv.py │ ├── check_alfworld.py │ ├── check_kl_bias.py │ ├── check_last_value.py │ ├── check_prm800k.py │ ├── clip.py │ ├── collect_trajs.py │ ├── compare.py │ ├── construct_dpo_dataset.py │ ├── construct_dpo_dataset_alfworld.py │ ├── construct_sft.py │ ├── construct_sft_from-dsm.py │ ├── delay_run.sh │ ├── kl.py │ ├── load_alfworld.py │ ├── load_dart-math.py │ ├── math_answer_length.py │ ├── mix_dataset.py │ ├── plot_max_step_values.py │ ├── plot_weight_dist.py │ ├── rejection_sampling.py │ ├── rejection_sampling_alfworld.py │ ├── repeat_gen.py │ ├── run_alfworld.py │ ├── run_alfworld_async.py │ ├── run_alfworld_async_with_value.py │ ├── run_dsm.py │ ├── run_qwen.py │ ├── sbs.py │ ├── sbs_1.py │ ├── sbs_async.py │ ├── sbs_qwen.py │ ├── serve_value.py │ ├── test_alfworld.py │ ├── test_alphamath.py │ ├── test_converge.py │ ├── test_qwen.py │ ├── test_unbounded.py │ ├── upload_critic.py │ ├── value.py │ ├── value_2.py │ ├── value_alfworld.py │ ├── value_qwen.py │ └── weight_dist.py ├── scripts │ ├── build_openrlhf.sh │ ├── docker_run.sh │ ├── nvidia_docker_install.sh │ ├── train_conditional_llama.sh │ ├── train_continue_pretrain_llama.sh │ ├── train_dpo_llama.sh │ ├── train_dpo_llama_34b.sh │ ├── train_knowledge_distillation.sh │ ├── train_kto_llama.sh │ ├── train_llama_slurm.sh │ ├── train_oreo.sh │ ├── train_oreo_alfworld.sh │ ├── train_oreo_alfworld_sft.sh │ ├── train_oreo_deepseek-math.sh │ ├── train_oreo_sft.sh │ ├── train_ppo_llama.sh │ ├── train_ppo_llama_ray.sh │ ├── train_ppo_llama_ray_70b.sh │ ├── train_ppo_llama_ray_slurm.sh │ ├── train_rejection_sampling_llama.sh │ ├── train_rm_llama.sh │ ├── train_sft_gsm8k.sh │ ├── train_sft_jamba_lora.sh │ ├── train_sft_llama.sh │ └── train_sft_mixtral_lora.sh ├── train_dpo.py ├── train_kd.py ├── train_kto.py ├── train_pcl.py ├── train_ppo.py ├── train_ppo_ray.py ├── train_rm.py └── train_sft.py ├── openrlhf ├── __init__.py ├── datasets │ ├── __init__.py │ ├── answer_extraction.py │ ├── eval │ │ ├── eval_script.py │ │ ├── eval_utils.py │ │ ├── ocwcourses_eval_utils.py │ │ ├── python_executor.py │ │ └── utils.py │ ├── pcl_dataset.py │ ├── pcl_dataset_alfworld.py │ ├── prompts_dataset.py │ ├── reward_dataset.py │ ├── sft_dataset.py │ ├── unpaired_preference_dataset.py │ └── utils.py ├── models │ ├── __init__.py │ ├── actor.py │ ├── actor_critic.bak.py │ ├── actor_critic.py │ ├── loss.py │ ├── model.py │ └── utils.py ├── trainer │ ├── __init__.py │ ├── dpo_trainer.py │ ├── kd_trainer.py │ ├── kto_trainer.py │ ├── pcl_trainer.py │ ├── ppo_trainer.py │ ├── ppo_utils │ │ ├── __init__.py │ │ ├── experience_maker.py │ │ ├── kl_controller.py │ │ └── replay_buffer.py │ ├── ray │ │ ├── __init__.py │ │ ├── launcher.py │ │ ├── ppo_actor.py │ │ ├── ppo_critic.py │ │ ├── vllm_engine.py │ │ └── vllm_worker_wrap.py │ ├── rm_trainer.py │ └── sft_trainer.py └── utils │ ├── __init__.py │ ├── deepspeed.py │ ├── deepspeed_utils.py │ ├── distributed_util.py │ ├── logging.py │ ├── processor.py │ └── utils.py ├── pyproject.toml ├── requirements.txt ├── setup.py └── version.txt /.github/workflows/pypi-nightly.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/.github/workflows/pypi-nightly.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/LICENSE -------------------------------------------------------------------------------- /OREO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/OREO.png -------------------------------------------------------------------------------- /Offline Reinforcement Learning for LLM Multi-Step Reasoning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/Offline Reinforcement Learning for LLM Multi-Step Reasoning.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/README.md -------------------------------------------------------------------------------- /README.openrlhf.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/README.openrlhf.md -------------------------------------------------------------------------------- /README_zh.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/README_zh.md -------------------------------------------------------------------------------- /dockerfile/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/dockerfile/Dockerfile -------------------------------------------------------------------------------- /dockerfile/docker-entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/dockerfile/docker-entrypoint.sh -------------------------------------------------------------------------------- /dockerfile/sources.list: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/dockerfile/sources.list -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/docs/logo.png -------------------------------------------------------------------------------- /docs/openrlhf_doc.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/docs/openrlhf_doc.md -------------------------------------------------------------------------------- /docs/openrlhf_doc_cn.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/docs/openrlhf_doc_cn.md -------------------------------------------------------------------------------- /docs/openrlhf_doc_pr.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/docs/openrlhf_doc_pr.md -------------------------------------------------------------------------------- /docs/ppo_examples.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/docs/ppo_examples.md -------------------------------------------------------------------------------- /docs/ray_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/docs/ray_architecture.png -------------------------------------------------------------------------------- /evaluation/ceval/ceval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/evaluation/ceval/ceval.py -------------------------------------------------------------------------------- /evaluation/ceval/ceval_data/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/evaluation/ceval/ceval_data/readme.md -------------------------------------------------------------------------------- /evaluation/ceval/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/evaluation/ceval/evaluator.py -------------------------------------------------------------------------------- /evaluation/ceval/llama_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/evaluation/ceval/llama_evaluator.py -------------------------------------------------------------------------------- /evaluation/ceval/run_ceval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/evaluation/ceval/run_ceval.sh -------------------------------------------------------------------------------- /evaluation/ceval/subject_mapping.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/evaluation/ceval/subject_mapping.json -------------------------------------------------------------------------------- /evaluation/cmmlu/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/cmmlu/categories.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/evaluation/cmmlu/categories.py -------------------------------------------------------------------------------- /evaluation/cmmlu/cmmlu_data/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/evaluation/cmmlu/cmmlu_data/readme.md -------------------------------------------------------------------------------- /evaluation/cmmlu/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/evaluation/cmmlu/eval.py -------------------------------------------------------------------------------- /evaluation/cmmlu/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/evaluation/cmmlu/evaluator.py -------------------------------------------------------------------------------- /evaluation/cmmlu/llama2_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/evaluation/cmmlu/llama2_evaluator.py -------------------------------------------------------------------------------- /evaluation/cmmlu/run_cmmlu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/evaluation/cmmlu/run_cmmlu.sh -------------------------------------------------------------------------------- /evaluation/gpt4/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/evaluation/gpt4/README.md -------------------------------------------------------------------------------- /evaluation/gpt4/benchmark.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/evaluation/gpt4/benchmark.jsonl -------------------------------------------------------------------------------- /examples/batch_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/batch_inference.py -------------------------------------------------------------------------------- /examples/check_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/check_data.py -------------------------------------------------------------------------------- /examples/gen_balanced.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/gen_balanced.py -------------------------------------------------------------------------------- /examples/gen_balanced_new.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/gen_balanced_new.py -------------------------------------------------------------------------------- /examples/gen_balanced_new_qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/gen_balanced_new_qwen.py -------------------------------------------------------------------------------- /examples/interactive_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/interactive_chat.py -------------------------------------------------------------------------------- /examples/length_distribution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/length_distribution.py -------------------------------------------------------------------------------- /examples/merge_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/merge_lora.py -------------------------------------------------------------------------------- /examples/run_gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/run_gsm8k.py -------------------------------------------------------------------------------- /examples/scratch/a.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/a.py -------------------------------------------------------------------------------- /examples/scratch/a.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/a.sh -------------------------------------------------------------------------------- /examples/scratch/a.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/a.txt -------------------------------------------------------------------------------- /examples/scratch/alfworld_length.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/alfworld_length.py -------------------------------------------------------------------------------- /examples/scratch/b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/b.sh -------------------------------------------------------------------------------- /examples/scratch/balanced_dart.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/balanced_dart.py -------------------------------------------------------------------------------- /examples/scratch/batch_sbs.bak.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/batch_sbs.bak.py -------------------------------------------------------------------------------- /examples/scratch/batch_sbs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/batch_sbs.py -------------------------------------------------------------------------------- /examples/scratch/batch_sbs_qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/batch_sbs_qwen.py -------------------------------------------------------------------------------- /examples/scratch/bon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/bon.py -------------------------------------------------------------------------------- /examples/scratch/check_adv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/check_adv.py -------------------------------------------------------------------------------- /examples/scratch/check_alfworld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/check_alfworld.py -------------------------------------------------------------------------------- /examples/scratch/check_kl_bias.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/check_kl_bias.py -------------------------------------------------------------------------------- /examples/scratch/check_last_value.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/check_last_value.py -------------------------------------------------------------------------------- /examples/scratch/check_prm800k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/check_prm800k.py -------------------------------------------------------------------------------- /examples/scratch/clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/clip.py -------------------------------------------------------------------------------- /examples/scratch/collect_trajs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/collect_trajs.py -------------------------------------------------------------------------------- /examples/scratch/compare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/compare.py -------------------------------------------------------------------------------- /examples/scratch/construct_dpo_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/construct_dpo_dataset.py -------------------------------------------------------------------------------- /examples/scratch/construct_dpo_dataset_alfworld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/construct_dpo_dataset_alfworld.py -------------------------------------------------------------------------------- /examples/scratch/construct_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/construct_sft.py -------------------------------------------------------------------------------- /examples/scratch/construct_sft_from-dsm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/construct_sft_from-dsm.py -------------------------------------------------------------------------------- /examples/scratch/delay_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/delay_run.sh -------------------------------------------------------------------------------- /examples/scratch/kl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/kl.py -------------------------------------------------------------------------------- /examples/scratch/load_alfworld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/load_alfworld.py -------------------------------------------------------------------------------- /examples/scratch/load_dart-math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/load_dart-math.py -------------------------------------------------------------------------------- /examples/scratch/math_answer_length.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/math_answer_length.py -------------------------------------------------------------------------------- /examples/scratch/mix_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/mix_dataset.py -------------------------------------------------------------------------------- /examples/scratch/plot_max_step_values.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/plot_max_step_values.py -------------------------------------------------------------------------------- /examples/scratch/plot_weight_dist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/plot_weight_dist.py -------------------------------------------------------------------------------- /examples/scratch/rejection_sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/rejection_sampling.py -------------------------------------------------------------------------------- /examples/scratch/rejection_sampling_alfworld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/rejection_sampling_alfworld.py -------------------------------------------------------------------------------- /examples/scratch/repeat_gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/repeat_gen.py -------------------------------------------------------------------------------- /examples/scratch/run_alfworld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/run_alfworld.py -------------------------------------------------------------------------------- /examples/scratch/run_alfworld_async.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/run_alfworld_async.py -------------------------------------------------------------------------------- /examples/scratch/run_alfworld_async_with_value.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/run_alfworld_async_with_value.py -------------------------------------------------------------------------------- /examples/scratch/run_dsm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/run_dsm.py -------------------------------------------------------------------------------- /examples/scratch/run_qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/run_qwen.py -------------------------------------------------------------------------------- /examples/scratch/sbs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/sbs.py -------------------------------------------------------------------------------- /examples/scratch/sbs_1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/sbs_1.py -------------------------------------------------------------------------------- /examples/scratch/sbs_async.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/sbs_async.py -------------------------------------------------------------------------------- /examples/scratch/sbs_qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/sbs_qwen.py -------------------------------------------------------------------------------- /examples/scratch/serve_value.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/serve_value.py -------------------------------------------------------------------------------- /examples/scratch/test_alfworld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/test_alfworld.py -------------------------------------------------------------------------------- /examples/scratch/test_alphamath.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/test_alphamath.py -------------------------------------------------------------------------------- /examples/scratch/test_converge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/test_converge.py -------------------------------------------------------------------------------- /examples/scratch/test_qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/test_qwen.py -------------------------------------------------------------------------------- /examples/scratch/test_unbounded.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/test_unbounded.py -------------------------------------------------------------------------------- /examples/scratch/upload_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/upload_critic.py -------------------------------------------------------------------------------- /examples/scratch/value.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/value.py -------------------------------------------------------------------------------- /examples/scratch/value_2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/value_2.py -------------------------------------------------------------------------------- /examples/scratch/value_alfworld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/value_alfworld.py -------------------------------------------------------------------------------- /examples/scratch/value_qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/value_qwen.py -------------------------------------------------------------------------------- /examples/scratch/weight_dist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scratch/weight_dist.py -------------------------------------------------------------------------------- /examples/scripts/build_openrlhf.sh: -------------------------------------------------------------------------------- 1 | set -x 2 | 3 | pip install --user ../../ -------------------------------------------------------------------------------- /examples/scripts/docker_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/docker_run.sh -------------------------------------------------------------------------------- /examples/scripts/nvidia_docker_install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/nvidia_docker_install.sh -------------------------------------------------------------------------------- /examples/scripts/train_conditional_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_conditional_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_continue_pretrain_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_continue_pretrain_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_dpo_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_dpo_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_dpo_llama_34b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_dpo_llama_34b.sh -------------------------------------------------------------------------------- /examples/scripts/train_knowledge_distillation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_knowledge_distillation.sh -------------------------------------------------------------------------------- /examples/scripts/train_kto_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_kto_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_llama_slurm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_llama_slurm.sh -------------------------------------------------------------------------------- /examples/scripts/train_oreo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_oreo.sh -------------------------------------------------------------------------------- /examples/scripts/train_oreo_alfworld.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_oreo_alfworld.sh -------------------------------------------------------------------------------- /examples/scripts/train_oreo_alfworld_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_oreo_alfworld_sft.sh -------------------------------------------------------------------------------- /examples/scripts/train_oreo_deepseek-math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_oreo_deepseek-math.sh -------------------------------------------------------------------------------- /examples/scripts/train_oreo_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_oreo_sft.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_ppo_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_ppo_llama_ray.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_70b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_ppo_llama_ray_70b.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_slurm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_ppo_llama_ray_slurm.sh -------------------------------------------------------------------------------- /examples/scripts/train_rejection_sampling_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_rejection_sampling_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_rm_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_rm_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_sft_gsm8k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_sft_gsm8k.sh -------------------------------------------------------------------------------- /examples/scripts/train_sft_jamba_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_sft_jamba_lora.sh -------------------------------------------------------------------------------- /examples/scripts/train_sft_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_sft_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_sft_mixtral_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/scripts/train_sft_mixtral_lora.sh -------------------------------------------------------------------------------- /examples/train_dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/train_dpo.py -------------------------------------------------------------------------------- /examples/train_kd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/train_kd.py -------------------------------------------------------------------------------- /examples/train_kto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/train_kto.py -------------------------------------------------------------------------------- /examples/train_pcl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/train_pcl.py -------------------------------------------------------------------------------- /examples/train_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/train_ppo.py -------------------------------------------------------------------------------- /examples/train_ppo_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/train_ppo_ray.py -------------------------------------------------------------------------------- /examples/train_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/train_rm.py -------------------------------------------------------------------------------- /examples/train_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/examples/train_sft.py -------------------------------------------------------------------------------- /openrlhf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /openrlhf/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/datasets/__init__.py -------------------------------------------------------------------------------- /openrlhf/datasets/answer_extraction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/datasets/answer_extraction.py -------------------------------------------------------------------------------- /openrlhf/datasets/eval/eval_script.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/datasets/eval/eval_script.py -------------------------------------------------------------------------------- /openrlhf/datasets/eval/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/datasets/eval/eval_utils.py -------------------------------------------------------------------------------- /openrlhf/datasets/eval/ocwcourses_eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/datasets/eval/ocwcourses_eval_utils.py -------------------------------------------------------------------------------- /openrlhf/datasets/eval/python_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/datasets/eval/python_executor.py -------------------------------------------------------------------------------- /openrlhf/datasets/eval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/datasets/eval/utils.py -------------------------------------------------------------------------------- /openrlhf/datasets/pcl_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/datasets/pcl_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/pcl_dataset_alfworld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/datasets/pcl_dataset_alfworld.py -------------------------------------------------------------------------------- /openrlhf/datasets/prompts_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/datasets/prompts_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/datasets/reward_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/datasets/sft_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/unpaired_preference_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/datasets/unpaired_preference_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/datasets/utils.py -------------------------------------------------------------------------------- /openrlhf/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/models/__init__.py -------------------------------------------------------------------------------- /openrlhf/models/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/models/actor.py -------------------------------------------------------------------------------- /openrlhf/models/actor_critic.bak.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/models/actor_critic.bak.py -------------------------------------------------------------------------------- /openrlhf/models/actor_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/models/actor_critic.py -------------------------------------------------------------------------------- /openrlhf/models/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/models/loss.py -------------------------------------------------------------------------------- /openrlhf/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/models/model.py -------------------------------------------------------------------------------- /openrlhf/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/models/utils.py -------------------------------------------------------------------------------- /openrlhf/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/dpo_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/kd_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/kd_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/kto_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/kto_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/pcl_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/pcl_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/ppo_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/ppo_utils/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/experience_maker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/ppo_utils/experience_maker.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/kl_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/ppo_utils/kl_controller.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/replay_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/ppo_utils/replay_buffer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/ray/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/ray/launcher.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/ppo_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/ray/ppo_actor.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/ppo_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/ray/ppo_critic.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/vllm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/ray/vllm_engine.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/vllm_worker_wrap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/ray/vllm_worker_wrap.py -------------------------------------------------------------------------------- /openrlhf/trainer/rm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/rm_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/trainer/sft_trainer.py -------------------------------------------------------------------------------- /openrlhf/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/utils/__init__.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/utils/deepspeed.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/utils/deepspeed_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/distributed_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/utils/distributed_util.py -------------------------------------------------------------------------------- /openrlhf/utils/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/utils/logging.py -------------------------------------------------------------------------------- /openrlhf/utils/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/utils/processor.py -------------------------------------------------------------------------------- /openrlhf/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/openrlhf/utils/utils.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jwhj/OREO/HEAD/setup.py -------------------------------------------------------------------------------- /version.txt: -------------------------------------------------------------------------------- 1 | 0.2.9 --------------------------------------------------------------------------------