├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── README_for_OpenRLHF.md
├── README_for_Satori.md
├── dockerfile
    ├── Dockerfile
    └── docker-entrypoint.sh
├── docs
    ├── logo.png
    ├── openrlhf_architecture.svg
    └── ppo_examples.md
├── examples
    ├── python
    │   ├── agent_func.py
    │   └── reward_func.py
    ├── satori
    │   ├── train.sh
    │   └── train_multi.sh
    └── scripts
    │   ├── ckpt_ds_zero_to_universal.sh
    │   ├── docker_run.sh
    │   ├── nvidia_docker_install.sh
    │   ├── serve_remote_rm.sh
    │   ├── train_conditional_llama.sh
    │   ├── train_dapo_ray_hybrid_engine.sh
    │   ├── train_dpo_llama.sh
    │   ├── train_dpo_ring_llama.sh
    │   ├── train_grpo_ray_hybrid_engine.sh
    │   ├── train_grpo_ray_hybrid_engine_agent.sh
    │   ├── train_iterative_dpo_llama.sh
    │   ├── train_knowledge_distillation.sh
    │   ├── train_kto_llama.sh
    │   ├── train_llama_slurm.sh
    │   ├── train_ppo_llama_ray.sh
    │   ├── train_ppo_llama_ray_70b.sh
    │   ├── train_ppo_llama_ray_hybrid_engine.sh
    │   ├── train_ppo_llama_ray_ring.sh
    │   ├── train_ppo_llama_ray_slurm.sh
    │   ├── train_ppo_llama_ray_tensor_parallelism.sh
    │   ├── train_ppo_llama_with_remote_rm.sh
    │   ├── train_ppo_llama_with_reward_fn.sh
    │   ├── train_prm_mistral.sh
    │   ├── train_reinforce_baseline_llama_ray_agent_async.sh
    │   ├── train_reinforce_baseline_llama_ray_async.sh
    │   ├── train_reinforce_baseline_llama_ray_hybrid_engine.sh
    │   ├── train_reinforce_llama_ray_hybrid_engine.sh
    │   ├── train_rejection_sampling_llama.sh
    │   ├── train_rm_llama.sh
    │   ├── train_sft_llama.sh
    │   ├── train_sft_llama_tensor_parallelism.sh
    │   └── train_sft_mixtral_lora.sh
├── openrlhf.egg-info
    ├── PKG-INFO
    ├── SOURCES.txt
    ├── dependency_links.txt
    ├── requires.txt
    └── top_level.txt
├── openrlhf
    ├── __init__.py
    ├── __pycache__
    │   └── __init__.cpython-310.pyc
    ├── cli
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   └── train_ppo_ray.cpython-310.pyc
    │   ├── batch_inference.py
    │   ├── interactive_chat.py
    │   ├── lora_combiner.py
    │   ├── serve_rm.py
    │   ├── train_dpo.py
    │   ├── train_kd.py
    │   ├── train_kto.py
    │   ├── train_ppo_ray.py
    │   ├── train_prm.py
    │   ├── train_rm.py
    │   └── train_sft.py
    ├── datasets
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── process_reward_dataset.cpython-310.pyc
    │   │   ├── prompts_dataset.cpython-310.pyc
    │   │   ├── reward_dataset.cpython-310.pyc
    │   │   ├── sft_dataset.cpython-310.pyc
    │   │   ├── unpaired_preference_dataset.cpython-310.pyc
    │   │   └── utils.cpython-310.pyc
    │   ├── process_reward_dataset.py
    │   ├── prompts_dataset.py
    │   ├── reward_dataset.py
    │   ├── sft_dataset.py
    │   ├── unpaired_preference_dataset.py
    │   └── utils.py
    ├── eval_src
    │   ├── Evaluator.py
    │   ├── __pycache__
    │   │   ├── Evaluator.cpython-310.pyc
    │   │   ├── Evaluator.cpython-311.pyc
    │   │   ├── Evaluator.cpython-312.pyc
    │   │   └── Evaluator_debugged.cpython-310.pyc
    │   └── toolkit_for_MATH
    │   │   ├── __pycache__
    │   │       ├── latex_answer_check.cpython-310.pyc
    │   │       ├── latex_answer_check.cpython-311.pyc
    │   │       ├── latex_answer_check.cpython-312.pyc
    │   │       ├── parsing_lib.cpython-310.pyc
    │   │       ├── parsing_lib.cpython-311.pyc
    │   │       └── parsing_lib.cpython-312.pyc
    │   │   ├── custom_toolkit
    │   │       ├── __pycache__
    │   │       │   ├── funcs.cpython-311.pyc
    │   │       │   └── latex.cpython-311.pyc
    │   │       ├── funcs.py
    │   │       └── latex.py
    │   │   ├── latex_answer_check.py
    │   │   ├── parsing_lib.py
    │   │   └── simple_answer_check.py
    ├── models
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── actor.cpython-310.pyc
    │   │   ├── loss.cpython-310.pyc
    │   │   ├── model.cpython-310.pyc
    │   │   ├── ring_attn_utils.cpython-310.pyc
    │   │   └── utils.cpython-310.pyc
    │   ├── actor.py
    │   ├── loss.py
    │   ├── model.py
    │   ├── ring_attn_utils.py
    │   └── utils.py
    ├── trainer
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── ppo_trainer.cpython-310.pyc
    │   │   └── rm_trainer.cpython-310.pyc
    │   ├── dpo_trainer.py
    │   ├── kd_trainer.py
    │   ├── kto_trainer.py
    │   ├── ppo_trainer.py
    │   ├── ppo_trainer_async.py
    │   ├── ppo_utils
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── experience_maker.cpython-310.pyc
    │   │   │   ├── kl_controller.cpython-310.pyc
    │   │   │   └── replay_buffer.cpython-310.pyc
    │   │   ├── experience_maker.py
    │   │   ├── experience_maker_async.py
    │   │   ├── kl_controller.py
    │   │   └── replay_buffer.py
    │   ├── prm_trainer.py
    │   ├── ray
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── launcher.cpython-310.pyc
    │   │   │   ├── ppo_actor.cpython-310.pyc
    │   │   │   ├── ppo_critic.cpython-310.pyc
    │   │   │   ├── utils.cpython-310.pyc
    │   │   │   ├── vllm_engine.cpython-310.pyc
    │   │   │   └── vllm_worker_wrap.cpython-310.pyc
    │   │   ├── launcher.py
    │   │   ├── ppo_actor.py
    │   │   ├── ppo_critic.py
    │   │   ├── utils.py
    │   │   ├── vllm_engine.py
    │   │   ├── vllm_engine_async.py
    │   │   └── vllm_worker_wrap.py
    │   ├── rm_trainer.py
    │   └── sft_trainer.py
    └── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-310.pyc
    │       ├── distributed_sampler.cpython-310.pyc
    │       ├── distributed_util.cpython-310.pyc
    │       ├── logging_utils.cpython-310.pyc
    │       ├── processor.cpython-310.pyc
    │       └── utils.cpython-310.pyc
    │   ├── deepspeed
    │       ├── __init__.py
    │       ├── __pycache__
    │       │   ├── __init__.cpython-310.pyc
    │       │   ├── deepspeed.cpython-310.pyc
    │       │   └── deepspeed_utils.cpython-310.pyc
    │       ├── deepspeed.py
    │       └── deepspeed_utils.py
    │   ├── distributed_sampler.py
    │   ├── distributed_util.py
    │   ├── logging_utils.py
    │   ├── processor.py
    │   ├── remote_rm_utils.py
    │   └── utils.py
├── pyproject.toml
├── requirements.txt
├── setup.py
└── version.txt


/CONTRIBUTING.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/CONTRIBUTING.md


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/README.md


--------------------------------------------------------------------------------
/README_for_OpenRLHF.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/README_for_OpenRLHF.md


--------------------------------------------------------------------------------
/README_for_Satori.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/README_for_Satori.md


--------------------------------------------------------------------------------
/dockerfile/Dockerfile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/dockerfile/Dockerfile


--------------------------------------------------------------------------------
/dockerfile/docker-entrypoint.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/dockerfile/docker-entrypoint.sh


--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/docs/logo.png


--------------------------------------------------------------------------------
/docs/openrlhf_architecture.svg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/docs/openrlhf_architecture.svg


--------------------------------------------------------------------------------
/docs/ppo_examples.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/docs/ppo_examples.md


--------------------------------------------------------------------------------
/examples/python/agent_func.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/python/agent_func.py


--------------------------------------------------------------------------------
/examples/python/reward_func.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/python/reward_func.py


--------------------------------------------------------------------------------
/examples/satori/train.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/satori/train.sh


--------------------------------------------------------------------------------
/examples/satori/train_multi.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/satori/train_multi.sh


--------------------------------------------------------------------------------
/examples/scripts/ckpt_ds_zero_to_universal.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/ckpt_ds_zero_to_universal.sh


--------------------------------------------------------------------------------
/examples/scripts/docker_run.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/docker_run.sh


--------------------------------------------------------------------------------
/examples/scripts/nvidia_docker_install.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/nvidia_docker_install.sh


--------------------------------------------------------------------------------
/examples/scripts/serve_remote_rm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/serve_remote_rm.sh


--------------------------------------------------------------------------------
/examples/scripts/train_conditional_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_conditional_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/train_dapo_ray_hybrid_engine.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_dapo_ray_hybrid_engine.sh


--------------------------------------------------------------------------------
/examples/scripts/train_dpo_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_dpo_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/train_dpo_ring_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_dpo_ring_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/train_grpo_ray_hybrid_engine.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_grpo_ray_hybrid_engine.sh


--------------------------------------------------------------------------------
/examples/scripts/train_grpo_ray_hybrid_engine_agent.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_grpo_ray_hybrid_engine_agent.sh


--------------------------------------------------------------------------------
/examples/scripts/train_iterative_dpo_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_iterative_dpo_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/train_knowledge_distillation.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_knowledge_distillation.sh


--------------------------------------------------------------------------------
/examples/scripts/train_kto_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_kto_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/train_llama_slurm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_llama_slurm.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_ppo_llama_ray.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_70b.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_ppo_llama_ray_70b.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_hybrid_engine.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_ppo_llama_ray_hybrid_engine.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_ring.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_ppo_llama_ray_ring.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_slurm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_ppo_llama_ray_slurm.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_ray_tensor_parallelism.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_ppo_llama_ray_tensor_parallelism.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_with_remote_rm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_ppo_llama_with_remote_rm.sh


--------------------------------------------------------------------------------
/examples/scripts/train_ppo_llama_with_reward_fn.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_ppo_llama_with_reward_fn.sh


--------------------------------------------------------------------------------
/examples/scripts/train_prm_mistral.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_prm_mistral.sh


--------------------------------------------------------------------------------
/examples/scripts/train_reinforce_baseline_llama_ray_agent_async.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_reinforce_baseline_llama_ray_agent_async.sh


--------------------------------------------------------------------------------
/examples/scripts/train_reinforce_baseline_llama_ray_async.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_reinforce_baseline_llama_ray_async.sh


--------------------------------------------------------------------------------
/examples/scripts/train_reinforce_baseline_llama_ray_hybrid_engine.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_reinforce_baseline_llama_ray_hybrid_engine.sh


--------------------------------------------------------------------------------
/examples/scripts/train_reinforce_llama_ray_hybrid_engine.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_reinforce_llama_ray_hybrid_engine.sh


--------------------------------------------------------------------------------
/examples/scripts/train_rejection_sampling_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_rejection_sampling_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/train_rm_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_rm_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/train_sft_llama.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_sft_llama.sh


--------------------------------------------------------------------------------
/examples/scripts/train_sft_llama_tensor_parallelism.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_sft_llama_tensor_parallelism.sh


--------------------------------------------------------------------------------
/examples/scripts/train_sft_mixtral_lora.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/examples/scripts/train_sft_mixtral_lora.sh


--------------------------------------------------------------------------------
/openrlhf.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf.egg-info/PKG-INFO


--------------------------------------------------------------------------------
/openrlhf.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf.egg-info/SOURCES.txt


--------------------------------------------------------------------------------
/openrlhf.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/openrlhf.egg-info/requires.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf.egg-info/requires.txt


--------------------------------------------------------------------------------
/openrlhf.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | openrlhf
2 | 


--------------------------------------------------------------------------------
/openrlhf/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/openrlhf/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/cli/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/openrlhf/cli/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/cli/__pycache__/train_ppo_ray.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/__pycache__/train_ppo_ray.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/cli/batch_inference.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/batch_inference.py


--------------------------------------------------------------------------------
/openrlhf/cli/interactive_chat.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/interactive_chat.py


--------------------------------------------------------------------------------
/openrlhf/cli/lora_combiner.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/lora_combiner.py


--------------------------------------------------------------------------------
/openrlhf/cli/serve_rm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/serve_rm.py


--------------------------------------------------------------------------------
/openrlhf/cli/train_dpo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/train_dpo.py


--------------------------------------------------------------------------------
/openrlhf/cli/train_kd.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/train_kd.py


--------------------------------------------------------------------------------
/openrlhf/cli/train_kto.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/train_kto.py


--------------------------------------------------------------------------------
/openrlhf/cli/train_ppo_ray.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/train_ppo_ray.py


--------------------------------------------------------------------------------
/openrlhf/cli/train_prm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/train_prm.py


--------------------------------------------------------------------------------
/openrlhf/cli/train_rm.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/train_rm.py


--------------------------------------------------------------------------------
/openrlhf/cli/train_sft.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/cli/train_sft.py


--------------------------------------------------------------------------------
/openrlhf/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/__init__.py


--------------------------------------------------------------------------------
/openrlhf/datasets/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/datasets/__pycache__/process_reward_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/__pycache__/process_reward_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/datasets/__pycache__/prompts_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/__pycache__/prompts_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/datasets/__pycache__/reward_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/__pycache__/reward_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/datasets/__pycache__/sft_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/__pycache__/sft_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/datasets/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/datasets/process_reward_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/process_reward_dataset.py


--------------------------------------------------------------------------------
/openrlhf/datasets/prompts_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/prompts_dataset.py


--------------------------------------------------------------------------------
/openrlhf/datasets/reward_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/reward_dataset.py


--------------------------------------------------------------------------------
/openrlhf/datasets/sft_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/sft_dataset.py


--------------------------------------------------------------------------------
/openrlhf/datasets/unpaired_preference_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/unpaired_preference_dataset.py


--------------------------------------------------------------------------------
/openrlhf/datasets/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/datasets/utils.py


--------------------------------------------------------------------------------
/openrlhf/eval_src/Evaluator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/Evaluator.py


--------------------------------------------------------------------------------
/openrlhf/eval_src/__pycache__/Evaluator.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/__pycache__/Evaluator.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/eval_src/__pycache__/Evaluator.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/__pycache__/Evaluator.cpython-311.pyc


--------------------------------------------------------------------------------
/openrlhf/eval_src/__pycache__/Evaluator.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/__pycache__/Evaluator.cpython-312.pyc


--------------------------------------------------------------------------------
/openrlhf/eval_src/__pycache__/Evaluator_debugged.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/__pycache__/Evaluator_debugged.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/eval_src/toolkit_for_MATH/__pycache__/latex_answer_check.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/__pycache__/latex_answer_check.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/eval_src/toolkit_for_MATH/__pycache__/latex_answer_check.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/__pycache__/latex_answer_check.cpython-311.pyc


--------------------------------------------------------------------------------
/openrlhf/eval_src/toolkit_for_MATH/__pycache__/latex_answer_check.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/__pycache__/latex_answer_check.cpython-312.pyc


--------------------------------------------------------------------------------
/openrlhf/eval_src/toolkit_for_MATH/__pycache__/parsing_lib.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/__pycache__/parsing_lib.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/eval_src/toolkit_for_MATH/__pycache__/parsing_lib.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/__pycache__/parsing_lib.cpython-311.pyc


--------------------------------------------------------------------------------
/openrlhf/eval_src/toolkit_for_MATH/__pycache__/parsing_lib.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/__pycache__/parsing_lib.cpython-312.pyc


--------------------------------------------------------------------------------
/openrlhf/eval_src/toolkit_for_MATH/custom_toolkit/__pycache__/funcs.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/custom_toolkit/__pycache__/funcs.cpython-311.pyc


--------------------------------------------------------------------------------
/openrlhf/eval_src/toolkit_for_MATH/custom_toolkit/__pycache__/latex.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/custom_toolkit/__pycache__/latex.cpython-311.pyc


--------------------------------------------------------------------------------
/openrlhf/eval_src/toolkit_for_MATH/custom_toolkit/funcs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/custom_toolkit/funcs.py


--------------------------------------------------------------------------------
/openrlhf/eval_src/toolkit_for_MATH/custom_toolkit/latex.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/custom_toolkit/latex.py


--------------------------------------------------------------------------------
/openrlhf/eval_src/toolkit_for_MATH/latex_answer_check.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/latex_answer_check.py


--------------------------------------------------------------------------------
/openrlhf/eval_src/toolkit_for_MATH/parsing_lib.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/parsing_lib.py


--------------------------------------------------------------------------------
/openrlhf/eval_src/toolkit_for_MATH/simple_answer_check.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/eval_src/toolkit_for_MATH/simple_answer_check.py


--------------------------------------------------------------------------------
/openrlhf/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/__init__.py


--------------------------------------------------------------------------------
/openrlhf/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/models/__pycache__/actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/__pycache__/actor.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/models/__pycache__/loss.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/__pycache__/loss.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/models/__pycache__/model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/__pycache__/model.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/models/__pycache__/ring_attn_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/__pycache__/ring_attn_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/models/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/models/actor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/actor.py


--------------------------------------------------------------------------------
/openrlhf/models/loss.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/loss.py


--------------------------------------------------------------------------------
/openrlhf/models/model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/model.py


--------------------------------------------------------------------------------
/openrlhf/models/ring_attn_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/ring_attn_utils.py


--------------------------------------------------------------------------------
/openrlhf/models/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/models/utils.py


--------------------------------------------------------------------------------
/openrlhf/trainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/__init__.py


--------------------------------------------------------------------------------
/openrlhf/trainer/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/__pycache__/ppo_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/__pycache__/ppo_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/__pycache__/rm_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/__pycache__/rm_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/dpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/dpo_trainer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/kd_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/kd_trainer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/kto_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/kto_trainer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_trainer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_trainer_async.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_trainer_async.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/__init__.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/__pycache__/experience_maker.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/__pycache__/experience_maker.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/__pycache__/kl_controller.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/__pycache__/kl_controller.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/__pycache__/replay_buffer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/__pycache__/replay_buffer.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/experience_maker.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/experience_maker.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/experience_maker_async.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/experience_maker_async.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/kl_controller.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/kl_controller.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ppo_utils/replay_buffer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ppo_utils/replay_buffer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/prm_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/prm_trainer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/__init__.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/__pycache__/launcher.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/__pycache__/launcher.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/__pycache__/ppo_actor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/__pycache__/ppo_actor.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/__pycache__/ppo_critic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/__pycache__/ppo_critic.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/__pycache__/vllm_engine.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/__pycache__/vllm_engine.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/__pycache__/vllm_worker_wrap.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/__pycache__/vllm_worker_wrap.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/launcher.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/launcher.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/ppo_actor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/ppo_actor.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/ppo_critic.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/ppo_critic.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/utils.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/vllm_engine.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/vllm_engine.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/vllm_engine_async.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/vllm_engine_async.py


--------------------------------------------------------------------------------
/openrlhf/trainer/ray/vllm_worker_wrap.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/ray/vllm_worker_wrap.py


--------------------------------------------------------------------------------
/openrlhf/trainer/rm_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/rm_trainer.py


--------------------------------------------------------------------------------
/openrlhf/trainer/sft_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/trainer/sft_trainer.py


--------------------------------------------------------------------------------
/openrlhf/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/__init__.py


--------------------------------------------------------------------------------
/openrlhf/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/utils/__pycache__/distributed_sampler.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/__pycache__/distributed_sampler.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/utils/__pycache__/distributed_util.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/__pycache__/distributed_util.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/utils/__pycache__/logging_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/__pycache__/logging_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/utils/__pycache__/processor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/__pycache__/processor.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/utils/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/utils/deepspeed/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/deepspeed/__init__.py


--------------------------------------------------------------------------------
/openrlhf/utils/deepspeed/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/deepspeed/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/utils/deepspeed/__pycache__/deepspeed.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/deepspeed/__pycache__/deepspeed.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/utils/deepspeed/__pycache__/deepspeed_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/deepspeed/__pycache__/deepspeed_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/openrlhf/utils/deepspeed/deepspeed.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/deepspeed/deepspeed.py


--------------------------------------------------------------------------------
/openrlhf/utils/deepspeed/deepspeed_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/deepspeed/deepspeed_utils.py


--------------------------------------------------------------------------------
/openrlhf/utils/distributed_sampler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/distributed_sampler.py


--------------------------------------------------------------------------------
/openrlhf/utils/distributed_util.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/distributed_util.py


--------------------------------------------------------------------------------
/openrlhf/utils/logging_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/logging_utils.py


--------------------------------------------------------------------------------
/openrlhf/utils/processor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/processor.py


--------------------------------------------------------------------------------
/openrlhf/utils/remote_rm_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/remote_rm_utils.py


--------------------------------------------------------------------------------
/openrlhf/utils/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/openrlhf/utils/utils.py


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/pyproject.toml


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/requirements.txt


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satori-reasoning/Satori/HEAD/setup.py


--------------------------------------------------------------------------------
/version.txt:
--------------------------------------------------------------------------------
1 | 0.8.2


--------------------------------------------------------------------------------