├── LICENSE ├── README.md ├── assets └── main_fig.png ├── report └── OpenRFT.pdf └── src ├── Evaluate ├── evaluate.py ├── inference.py ├── run_vllm.sh └── test_data │ ├── GB1_ftness_prediction.json │ ├── chemical_calculation.json │ ├── diffusion_rate_analysis.json │ ├── high_school_physics_calculation.json │ ├── material_calculation.json │ ├── molecule_structure_prediction.json │ ├── perovskite_stability_prediction.json │ └── retrosynthesis.json ├── PPO ├── data │ ├── processed │ │ ├── GB1_ftness_prediction.jsonl │ │ ├── chemical_calculation.jsonl │ │ ├── diffusion_rate_analysis.jsonl │ │ ├── high_school_physics_calculation.jsonl │ │ ├── material_calculation.jsonl │ │ ├── molecule_structure_prediction.jsonl │ │ ├── perovskite_stability_prediction.jsonl │ │ └── retrosynthesis.jsonl │ └── training_data │ │ ├── GB1_ftness_prediction.json │ │ ├── chemical_calculation.json │ │ ├── diffusion_rate_analysis.json │ │ ├── high_school_physics_calculation.json │ │ ├── material_calculation.json │ │ ├── molecule_structure_prediction.json │ │ ├── perovskite_stability_prediction.json │ │ └── retrosynthesis.json ├── openrlhf │ ├── __init__.py │ ├── cli │ │ ├── __init__.py │ │ ├── batch_inference.py │ │ ├── interactive_chat.py │ │ ├── serve_rm.py │ │ ├── train_dpo.py │ │ ├── train_kd.py │ │ ├── train_kto.py │ │ ├── train_ppo.py │ │ ├── train_ppo_ray.py │ │ ├── train_prm.py │ │ ├── train_rft.py │ │ ├── train_rm.py │ │ └── train_sft.py │ ├── datasets │ │ ├── __init__.py │ │ ├── process_reward_dataset.py │ │ ├── prompts_dataset.py │ │ ├── reward_dataset.py │ │ ├── sft_dataset.py │ │ ├── unpaired_preference_dataset.py │ │ └── utils.py │ ├── models │ │ ├── __init__.py │ │ ├── actor.py │ │ ├── loss.py │ │ ├── model.py │ │ ├── ring_attn_utils.py │ │ └── utils.py │ ├── trainer │ │ ├── __init__.py │ │ ├── dpo_trainer.py │ │ ├── kd_trainer.py │ │ ├── kto_trainer.py │ │ ├── ppo_trainer.py │ │ ├── ppo_utils │ │ │ ├── __init__.py │ │ │ ├── experience_maker.py │ │ │ ├── kl_controller.py │ │ │ └── replay_buffer.py │ │ ├── prm_trainer.py │ │ ├── ray │ │ │ ├── __init__.py │ │ │ ├── launcher.py │ │ │ ├── ppo_actor.py │ │ │ ├── ppo_critic.py │ │ │ ├── utils.py │ │ │ ├── vllm_engine.py │ │ │ └── vllm_worker_wrap.py │ │ ├── rft_trainer.py │ │ ├── rm_trainer.py │ │ └── sft_trainer.py │ └── utils │ │ ├── __init__.py │ │ ├── deepspeed │ │ ├── __init__.py │ │ ├── deepspeed.py │ │ └── deepspeed_utils.py │ │ ├── distributed_sampler.py │ │ ├── distributed_util.py │ │ ├── logging_utils.py │ │ ├── processor.py │ │ ├── remote_rm_utils.py │ │ └── utils.py ├── pre_process.ipynb ├── run_ppo.sh ├── serve_remote_rewards_assign.sh └── serve_rewards_assign.py └── SFT ├── example_data ├── chemical_calculation.jsonl ├── diffusion_rate_analysis.jsonl └── material_calculation.jsonl ├── sft.py └── start_SFT.sh /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/README.md -------------------------------------------------------------------------------- /assets/main_fig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/assets/main_fig.png -------------------------------------------------------------------------------- /report/OpenRFT.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/report/OpenRFT.pdf -------------------------------------------------------------------------------- /src/Evaluate/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/Evaluate/evaluate.py -------------------------------------------------------------------------------- /src/Evaluate/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/Evaluate/inference.py -------------------------------------------------------------------------------- /src/Evaluate/run_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/Evaluate/run_vllm.sh -------------------------------------------------------------------------------- /src/Evaluate/test_data/GB1_ftness_prediction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/Evaluate/test_data/GB1_ftness_prediction.json -------------------------------------------------------------------------------- /src/Evaluate/test_data/chemical_calculation.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/Evaluate/test_data/chemical_calculation.json -------------------------------------------------------------------------------- /src/Evaluate/test_data/diffusion_rate_analysis.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/Evaluate/test_data/diffusion_rate_analysis.json -------------------------------------------------------------------------------- /src/Evaluate/test_data/high_school_physics_calculation.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/Evaluate/test_data/high_school_physics_calculation.json -------------------------------------------------------------------------------- /src/Evaluate/test_data/material_calculation.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/Evaluate/test_data/material_calculation.json -------------------------------------------------------------------------------- /src/Evaluate/test_data/molecule_structure_prediction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/Evaluate/test_data/molecule_structure_prediction.json -------------------------------------------------------------------------------- /src/Evaluate/test_data/perovskite_stability_prediction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/Evaluate/test_data/perovskite_stability_prediction.json -------------------------------------------------------------------------------- /src/Evaluate/test_data/retrosynthesis.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/Evaluate/test_data/retrosynthesis.json -------------------------------------------------------------------------------- /src/PPO/data/processed/GB1_ftness_prediction.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/data/processed/GB1_ftness_prediction.jsonl -------------------------------------------------------------------------------- /src/PPO/data/processed/chemical_calculation.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/data/processed/chemical_calculation.jsonl -------------------------------------------------------------------------------- /src/PPO/data/processed/diffusion_rate_analysis.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/data/processed/diffusion_rate_analysis.jsonl -------------------------------------------------------------------------------- /src/PPO/data/processed/high_school_physics_calculation.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/data/processed/high_school_physics_calculation.jsonl -------------------------------------------------------------------------------- /src/PPO/data/processed/material_calculation.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/data/processed/material_calculation.jsonl -------------------------------------------------------------------------------- /src/PPO/data/processed/molecule_structure_prediction.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/data/processed/molecule_structure_prediction.jsonl -------------------------------------------------------------------------------- /src/PPO/data/processed/perovskite_stability_prediction.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/data/processed/perovskite_stability_prediction.jsonl -------------------------------------------------------------------------------- /src/PPO/data/processed/retrosynthesis.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/data/processed/retrosynthesis.jsonl -------------------------------------------------------------------------------- /src/PPO/data/training_data/GB1_ftness_prediction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/data/training_data/GB1_ftness_prediction.json -------------------------------------------------------------------------------- /src/PPO/data/training_data/chemical_calculation.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/data/training_data/chemical_calculation.json -------------------------------------------------------------------------------- /src/PPO/data/training_data/diffusion_rate_analysis.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/data/training_data/diffusion_rate_analysis.json -------------------------------------------------------------------------------- /src/PPO/data/training_data/high_school_physics_calculation.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/data/training_data/high_school_physics_calculation.json -------------------------------------------------------------------------------- /src/PPO/data/training_data/material_calculation.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/data/training_data/material_calculation.json -------------------------------------------------------------------------------- /src/PPO/data/training_data/molecule_structure_prediction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/data/training_data/molecule_structure_prediction.json -------------------------------------------------------------------------------- /src/PPO/data/training_data/perovskite_stability_prediction.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/data/training_data/perovskite_stability_prediction.json -------------------------------------------------------------------------------- /src/PPO/data/training_data/retrosynthesis.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/data/training_data/retrosynthesis.json -------------------------------------------------------------------------------- /src/PPO/openrlhf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/PPO/openrlhf/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/PPO/openrlhf/cli/batch_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/cli/batch_inference.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/cli/interactive_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/cli/interactive_chat.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/cli/serve_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/cli/serve_rm.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/cli/train_dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/cli/train_dpo.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/cli/train_kd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/cli/train_kd.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/cli/train_kto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/cli/train_kto.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/cli/train_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/cli/train_ppo.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/cli/train_ppo_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/cli/train_ppo_ray.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/cli/train_prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/cli/train_prm.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/cli/train_rft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/cli/train_rft.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/cli/train_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/cli/train_rm.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/cli/train_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/cli/train_sft.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/datasets/__init__.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/datasets/process_reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/datasets/process_reward_dataset.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/datasets/prompts_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/datasets/prompts_dataset.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/datasets/reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/datasets/reward_dataset.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/datasets/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/datasets/sft_dataset.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/datasets/unpaired_preference_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/datasets/unpaired_preference_dataset.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/datasets/utils.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/models/__init__.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/models/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/models/actor.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/models/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/models/loss.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/models/model.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/models/ring_attn_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/models/ring_attn_utils.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/models/utils.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/__init__.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/dpo_trainer.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/kd_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/kd_trainer.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/kto_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/kto_trainer.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/ppo_trainer.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/ppo_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/ppo_utils/__init__.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/ppo_utils/experience_maker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/ppo_utils/experience_maker.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/ppo_utils/kl_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/ppo_utils/kl_controller.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/ppo_utils/replay_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/ppo_utils/replay_buffer.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/prm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/prm_trainer.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/ray/__init__.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/ray/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/ray/launcher.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/ray/ppo_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/ray/ppo_actor.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/ray/ppo_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/ray/ppo_critic.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/ray/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/ray/utils.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/ray/vllm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/ray/vllm_engine.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/ray/vllm_worker_wrap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/ray/vllm_worker_wrap.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/rft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/rft_trainer.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/rm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/rm_trainer.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/trainer/sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/trainer/sft_trainer.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/utils/__init__.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/utils/deepspeed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/utils/deepspeed/__init__.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/utils/deepspeed/deepspeed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/utils/deepspeed/deepspeed.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/utils/deepspeed/deepspeed_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/utils/deepspeed/deepspeed_utils.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/utils/distributed_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/utils/distributed_sampler.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/utils/distributed_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/utils/distributed_util.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/utils/logging_utils.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/utils/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/utils/processor.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/utils/remote_rm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/utils/remote_rm_utils.py -------------------------------------------------------------------------------- /src/PPO/openrlhf/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/openrlhf/utils/utils.py -------------------------------------------------------------------------------- /src/PPO/pre_process.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/pre_process.ipynb -------------------------------------------------------------------------------- /src/PPO/run_ppo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/run_ppo.sh -------------------------------------------------------------------------------- /src/PPO/serve_remote_rewards_assign.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/serve_remote_rewards_assign.sh -------------------------------------------------------------------------------- /src/PPO/serve_rewards_assign.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/PPO/serve_rewards_assign.py -------------------------------------------------------------------------------- /src/SFT/example_data/chemical_calculation.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/SFT/example_data/chemical_calculation.jsonl -------------------------------------------------------------------------------- /src/SFT/example_data/diffusion_rate_analysis.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/SFT/example_data/diffusion_rate_analysis.jsonl -------------------------------------------------------------------------------- /src/SFT/example_data/material_calculation.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/SFT/example_data/material_calculation.jsonl -------------------------------------------------------------------------------- /src/SFT/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/SFT/sft.py -------------------------------------------------------------------------------- /src/SFT/start_SFT.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ADaM-BJTU/OpenRFT/HEAD/src/SFT/start_SFT.sh --------------------------------------------------------------------------------