├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── README_for_OpenRLHF.md ├── README_for_Satori.md ├── dockerfile ├── Dockerfile └── docker-entrypoint.sh ├── docs ├── logo.png ├── openrlhf_architecture.svg └── ppo_examples.md ├── examples ├── python │ ├── agent_func.py │ └── reward_func.py ├── satori │ ├── train.sh │ └── train_multi.sh └── scripts │ ├── ckpt_ds_zero_to_universal.sh │ ├── docker_run.sh │ ├── nvidia_docker_install.sh │ ├── serve_remote_rm.sh │ ├── train_conditional_llama.sh │ ├── train_dapo_ray_hybrid_engine.sh │ ├── train_dpo_llama.sh │ ├── train_dpo_ring_llama.sh │ ├── train_grpo_ray_hybrid_engine.sh │ ├── train_grpo_ray_hybrid_engine_agent.sh │ ├── train_iterative_dpo_llama.sh │ ├── train_knowledge_distillation.sh │ ├── train_kto_llama.sh │ ├── train_llama_slurm.sh │ ├── train_ppo_llama_ray.sh │ ├── train_ppo_llama_ray_70b.sh │ ├── train_ppo_llama_ray_hybrid_engine.sh │ ├── train_ppo_llama_ray_ring.sh │ ├── train_ppo_llama_ray_slurm.sh │ ├── train_ppo_llama_ray_tensor_parallelism.sh │ ├── train_ppo_llama_with_remote_rm.sh │ ├── train_ppo_llama_with_reward_fn.sh │ ├── train_prm_mistral.sh │ ├── train_reinforce_baseline_llama_ray_agent_async.sh │ ├── train_reinforce_baseline_llama_ray_async.sh │ ├── train_reinforce_baseline_llama_ray_hybrid_engine.sh │ ├── train_reinforce_llama_ray_hybrid_engine.sh │ ├── train_rejection_sampling_llama.sh │ ├── train_rm_llama.sh │ ├── train_sft_llama.sh │ ├── train_sft_llama_tensor_parallelism.sh │ └── train_sft_mixtral_lora.sh ├── openrlhf.egg-info ├── PKG-INFO ├── SOURCES.txt ├── dependency_links.txt ├── requires.txt └── top_level.txt ├── openrlhf ├── __init__.py ├── __pycache__ │ └── __init__.cpython-310.pyc ├── cli │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── train_ppo_ray.cpython-310.pyc │ ├── batch_inference.py │ ├── interactive_chat.py │ ├── lora_combiner.py │ ├── serve_rm.py │ ├── train_dpo.py │ ├── train_kd.py │ ├── train_kto.py │ ├── train_ppo_ray.py │ ├── train_prm.py │ ├── train_rm.py │ └── train_sft.py ├── datasets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── process_reward_dataset.cpython-310.pyc │ │ ├── prompts_dataset.cpython-310.pyc │ │ ├── reward_dataset.cpython-310.pyc │ │ ├── sft_dataset.cpython-310.pyc │ │ ├── unpaired_preference_dataset.cpython-310.pyc │ │ └── utils.cpython-310.pyc │ ├── process_reward_dataset.py │ ├── prompts_dataset.py │ ├── reward_dataset.py │ ├── sft_dataset.py │ ├── unpaired_preference_dataset.py │ └── utils.py ├── eval_src │ ├── Evaluator.py │ ├── __pycache__ │ │ ├── Evaluator.cpython-310.pyc │ │ ├── Evaluator.cpython-311.pyc │ │ ├── Evaluator.cpython-312.pyc │ │ └── Evaluator_debugged.cpython-310.pyc │ └── toolkit_for_MATH │ │ ├── __pycache__ │ │ ├── latex_answer_check.cpython-310.pyc │ │ ├── latex_answer_check.cpython-311.pyc │ │ ├── latex_answer_check.cpython-312.pyc │ │ ├── parsing_lib.cpython-310.pyc │ │ ├── parsing_lib.cpython-311.pyc │ │ └── parsing_lib.cpython-312.pyc │ │ ├── custom_toolkit │ │ ├── __pycache__ │ │ │ ├── funcs.cpython-311.pyc │ │ │ └── latex.cpython-311.pyc │ │ ├── funcs.py │ │ └── latex.py │ │ ├── latex_answer_check.py │ │ ├── parsing_lib.py │ │ └── simple_answer_check.py ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── actor.cpython-310.pyc │ │ ├── loss.cpython-310.pyc │ │ ├── model.cpython-310.pyc │ │ ├── ring_attn_utils.cpython-310.pyc │ │ └── utils.cpython-310.pyc │ ├── actor.py │ ├── loss.py │ ├── model.py │ ├── ring_attn_utils.py │ └── utils.py ├── trainer │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── ppo_trainer.cpython-310.pyc │ │ └── rm_trainer.cpython-310.pyc │ ├── dpo_trainer.py │ ├── kd_trainer.py │ ├── kto_trainer.py │ ├── ppo_trainer.py │ ├── ppo_trainer_async.py │ ├── ppo_utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── experience_maker.cpython-310.pyc │ │ │ ├── kl_controller.cpython-310.pyc │ │ │ └── replay_buffer.cpython-310.pyc │ │ ├── experience_maker.py │ │ ├── experience_maker_async.py │ │ ├── kl_controller.py │ │ └── replay_buffer.py │ ├── prm_trainer.py │ ├── ray │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── launcher.cpython-310.pyc │ │ │ ├── ppo_actor.cpython-310.pyc │ │ │ ├── ppo_critic.cpython-310.pyc │ │ │ ├── utils.cpython-310.pyc │ │ │ ├── vllm_engine.cpython-310.pyc │ │ │ └── vllm_worker_wrap.cpython-310.pyc │ │ ├── launcher.py │ │ ├── ppo_actor.py │ │ ├── ppo_critic.py │ │ ├── utils.py │ │ ├── vllm_engine.py │ │ ├── vllm_engine_async.py │ │ └── vllm_worker_wrap.py │ ├── rm_trainer.py │ └── sft_trainer.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── distributed_sampler.cpython-310.pyc │ ├── distributed_util.cpython-310.pyc │ ├── logging_utils.cpython-310.pyc │ ├── processor.cpython-310.pyc │ └── utils.cpython-310.pyc │ ├── deepspeed │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── deepspeed.cpython-310.pyc │ │ └── deepspeed_utils.cpython-310.pyc │ ├── deepspeed.py │ └── deepspeed_utils.py │ ├── distributed_sampler.py │ ├── distributed_util.py │ ├── logging_utils.py │ ├── processor.py │ ├── remote_rm_utils.py │ └── utils.py ├── pyproject.toml ├── requirements.txt ├── setup.py └── version.txt /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/README.md -------------------------------------------------------------------------------- /README_for_OpenRLHF.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/README_for_OpenRLHF.md -------------------------------------------------------------------------------- /README_for_Satori.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/README_for_Satori.md -------------------------------------------------------------------------------- /dockerfile/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/dockerfile/Dockerfile -------------------------------------------------------------------------------- /dockerfile/docker-entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/dockerfile/docker-entrypoint.sh -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/docs/logo.png -------------------------------------------------------------------------------- /docs/openrlhf_architecture.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/docs/openrlhf_architecture.svg -------------------------------------------------------------------------------- /docs/ppo_examples.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/docs/ppo_examples.md -------------------------------------------------------------------------------- /examples/python/agent_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/python/agent_func.py -------------------------------------------------------------------------------- /examples/python/reward_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/python/reward_func.py -------------------------------------------------------------------------------- /examples/satori/train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/satori/train.sh -------------------------------------------------------------------------------- /examples/satori/train_multi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/satori/train_multi.sh -------------------------------------------------------------------------------- /examples/scripts/ckpt_ds_zero_to_universal.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/ckpt_ds_zero_to_universal.sh -------------------------------------------------------------------------------- /examples/scripts/docker_run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/docker_run.sh -------------------------------------------------------------------------------- /examples/scripts/nvidia_docker_install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/nvidia_docker_install.sh -------------------------------------------------------------------------------- /examples/scripts/serve_remote_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/serve_remote_rm.sh -------------------------------------------------------------------------------- /examples/scripts/train_conditional_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_conditional_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_dapo_ray_hybrid_engine.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_dapo_ray_hybrid_engine.sh -------------------------------------------------------------------------------- /examples/scripts/train_dpo_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_dpo_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_dpo_ring_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_dpo_ring_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_grpo_ray_hybrid_engine.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_grpo_ray_hybrid_engine.sh -------------------------------------------------------------------------------- /examples/scripts/train_grpo_ray_hybrid_engine_agent.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_grpo_ray_hybrid_engine_agent.sh -------------------------------------------------------------------------------- /examples/scripts/train_iterative_dpo_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_iterative_dpo_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_knowledge_distillation.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_knowledge_distillation.sh -------------------------------------------------------------------------------- /examples/scripts/train_kto_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_kto_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_llama_slurm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_llama_slurm.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_ppo_llama_ray.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_70b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_ppo_llama_ray_70b.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_hybrid_engine.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_ppo_llama_ray_hybrid_engine.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_ring.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_ppo_llama_ray_ring.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_slurm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_ppo_llama_ray_slurm.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_ray_tensor_parallelism.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_ppo_llama_ray_tensor_parallelism.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_with_remote_rm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_ppo_llama_with_remote_rm.sh -------------------------------------------------------------------------------- /examples/scripts/train_ppo_llama_with_reward_fn.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_ppo_llama_with_reward_fn.sh -------------------------------------------------------------------------------- /examples/scripts/train_prm_mistral.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_prm_mistral.sh -------------------------------------------------------------------------------- /examples/scripts/train_reinforce_baseline_llama_ray_agent_async.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_reinforce_baseline_llama_ray_agent_async.sh -------------------------------------------------------------------------------- /examples/scripts/train_reinforce_baseline_llama_ray_async.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_reinforce_baseline_llama_ray_async.sh -------------------------------------------------------------------------------- /examples/scripts/train_reinforce_baseline_llama_ray_hybrid_engine.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_reinforce_baseline_llama_ray_hybrid_engine.sh -------------------------------------------------------------------------------- /examples/scripts/train_reinforce_llama_ray_hybrid_engine.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_reinforce_llama_ray_hybrid_engine.sh -------------------------------------------------------------------------------- /examples/scripts/train_rejection_sampling_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_rejection_sampling_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_rm_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_rm_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_sft_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_sft_llama.sh -------------------------------------------------------------------------------- /examples/scripts/train_sft_llama_tensor_parallelism.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_sft_llama_tensor_parallelism.sh -------------------------------------------------------------------------------- /examples/scripts/train_sft_mixtral_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_sft_mixtral_lora.sh -------------------------------------------------------------------------------- /openrlhf.egg-info/PKG-INFO: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf.egg-info/PKG-INFO -------------------------------------------------------------------------------- /openrlhf.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf.egg-info/SOURCES.txt -------------------------------------------------------------------------------- /openrlhf.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /openrlhf.egg-info/requires.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf.egg-info/requires.txt -------------------------------------------------------------------------------- /openrlhf.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | openrlhf 2 | -------------------------------------------------------------------------------- /openrlhf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /openrlhf/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /openrlhf/cli/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/cli/__pycache__/train_ppo_ray.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/__pycache__/train_ppo_ray.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/cli/batch_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/batch_inference.py -------------------------------------------------------------------------------- /openrlhf/cli/interactive_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/interactive_chat.py -------------------------------------------------------------------------------- /openrlhf/cli/lora_combiner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/lora_combiner.py -------------------------------------------------------------------------------- /openrlhf/cli/serve_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/serve_rm.py -------------------------------------------------------------------------------- /openrlhf/cli/train_dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/train_dpo.py -------------------------------------------------------------------------------- /openrlhf/cli/train_kd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/train_kd.py -------------------------------------------------------------------------------- /openrlhf/cli/train_kto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/train_kto.py -------------------------------------------------------------------------------- /openrlhf/cli/train_ppo_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/train_ppo_ray.py -------------------------------------------------------------------------------- /openrlhf/cli/train_prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/train_prm.py -------------------------------------------------------------------------------- /openrlhf/cli/train_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/train_rm.py -------------------------------------------------------------------------------- /openrlhf/cli/train_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/train_sft.py -------------------------------------------------------------------------------- /openrlhf/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/__init__.py -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/process_reward_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/__pycache__/process_reward_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/prompts_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/__pycache__/prompts_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/reward_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/__pycache__/reward_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/sft_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/__pycache__/sft_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/process_reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/process_reward_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/prompts_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/prompts_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/reward_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/sft_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/unpaired_preference_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/unpaired_preference_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/utils.py -------------------------------------------------------------------------------- /openrlhf/eval_src/Evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/Evaluator.py -------------------------------------------------------------------------------- /openrlhf/eval_src/__pycache__/Evaluator.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/__pycache__/Evaluator.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/eval_src/__pycache__/Evaluator.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/__pycache__/Evaluator.cpython-311.pyc -------------------------------------------------------------------------------- /openrlhf/eval_src/__pycache__/Evaluator.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/__pycache__/Evaluator.cpython-312.pyc -------------------------------------------------------------------------------- /openrlhf/eval_src/__pycache__/Evaluator_debugged.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/__pycache__/Evaluator_debugged.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/eval_src/toolkit_for_MATH/__pycache__/latex_answer_check.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/__pycache__/latex_answer_check.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/eval_src/toolkit_for_MATH/__pycache__/latex_answer_check.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/__pycache__/latex_answer_check.cpython-311.pyc -------------------------------------------------------------------------------- /openrlhf/eval_src/toolkit_for_MATH/__pycache__/latex_answer_check.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/__pycache__/latex_answer_check.cpython-312.pyc -------------------------------------------------------------------------------- /openrlhf/eval_src/toolkit_for_MATH/__pycache__/parsing_lib.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/__pycache__/parsing_lib.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/eval_src/toolkit_for_MATH/__pycache__/parsing_lib.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/__pycache__/parsing_lib.cpython-311.pyc -------------------------------------------------------------------------------- /openrlhf/eval_src/toolkit_for_MATH/__pycache__/parsing_lib.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/__pycache__/parsing_lib.cpython-312.pyc -------------------------------------------------------------------------------- /openrlhf/eval_src/toolkit_for_MATH/custom_toolkit/__pycache__/funcs.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/custom_toolkit/__pycache__/funcs.cpython-311.pyc -------------------------------------------------------------------------------- /openrlhf/eval_src/toolkit_for_MATH/custom_toolkit/__pycache__/latex.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/custom_toolkit/__pycache__/latex.cpython-311.pyc -------------------------------------------------------------------------------- /openrlhf/eval_src/toolkit_for_MATH/custom_toolkit/funcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/custom_toolkit/funcs.py -------------------------------------------------------------------------------- /openrlhf/eval_src/toolkit_for_MATH/custom_toolkit/latex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/custom_toolkit/latex.py -------------------------------------------------------------------------------- /openrlhf/eval_src/toolkit_for_MATH/latex_answer_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/latex_answer_check.py -------------------------------------------------------------------------------- /openrlhf/eval_src/toolkit_for_MATH/parsing_lib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/parsing_lib.py -------------------------------------------------------------------------------- /openrlhf/eval_src/toolkit_for_MATH/simple_answer_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/simple_answer_check.py -------------------------------------------------------------------------------- /openrlhf/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/__init__.py -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/actor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/__pycache__/actor.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/loss.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/__pycache__/loss.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/ring_attn_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/__pycache__/ring_attn_utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/actor.py -------------------------------------------------------------------------------- /openrlhf/models/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/loss.py -------------------------------------------------------------------------------- /openrlhf/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/model.py -------------------------------------------------------------------------------- /openrlhf/models/ring_attn_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/ring_attn_utils.py -------------------------------------------------------------------------------- /openrlhf/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/utils.py -------------------------------------------------------------------------------- /openrlhf/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/ppo_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/__pycache__/ppo_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/__pycache__/rm_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/__pycache__/rm_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/dpo_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/kd_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/kd_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/kto_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/kto_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_trainer_async.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_trainer_async.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__pycache__/experience_maker.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/__pycache__/experience_maker.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__pycache__/kl_controller.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/__pycache__/kl_controller.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__pycache__/replay_buffer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/__pycache__/replay_buffer.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/experience_maker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/experience_maker.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/experience_maker_async.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/experience_maker_async.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/kl_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/kl_controller.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/replay_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/replay_buffer.py -------------------------------------------------------------------------------- /openrlhf/trainer/prm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/prm_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/launcher.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/__pycache__/launcher.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/ppo_actor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/__pycache__/ppo_actor.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/ppo_critic.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/__pycache__/ppo_critic.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/vllm_engine.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/__pycache__/vllm_engine.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__pycache__/vllm_worker_wrap.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/__pycache__/vllm_worker_wrap.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/trainer/ray/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/launcher.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/ppo_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/ppo_actor.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/ppo_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/ppo_critic.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/utils.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/vllm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/vllm_engine.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/vllm_engine_async.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/vllm_engine_async.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/vllm_worker_wrap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/vllm_worker_wrap.py -------------------------------------------------------------------------------- /openrlhf/trainer/rm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/rm_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/sft_trainer.py -------------------------------------------------------------------------------- /openrlhf/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/__init__.py -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/distributed_sampler.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/__pycache__/distributed_sampler.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/distributed_util.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/__pycache__/distributed_util.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/logging_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/__pycache__/logging_utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/processor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/__pycache__/processor.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/deepspeed/__init__.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/deepspeed/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/__pycache__/deepspeed.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/deepspeed/__pycache__/deepspeed.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/__pycache__/deepspeed_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/deepspeed/__pycache__/deepspeed_utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/deepspeed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/deepspeed/deepspeed.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/deepspeed_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/deepspeed/deepspeed_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/distributed_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/distributed_sampler.py -------------------------------------------------------------------------------- /openrlhf/utils/distributed_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/distributed_util.py -------------------------------------------------------------------------------- /openrlhf/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/logging_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/processor.py -------------------------------------------------------------------------------- /openrlhf/utils/remote_rm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/remote_rm_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/utils.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/setup.py -------------------------------------------------------------------------------- /version.txt: -------------------------------------------------------------------------------- 1 | 0.8.2 --------------------------------------------------------------------------------