├── assets └── main.png ├── data ├── evaluation │ └── test.json ├── prompts │ └── math.8k.json ├── scores.json └── train │ ├── limr │ ├── dataset_dict.json │ ├── test │ │ ├── data-00000-of-00001.arrow │ │ ├── dataset_info.json │ │ └── state.json │ └── train │ │ ├── data-00000-of-00001.arrow │ │ ├── dataset_info.json │ │ └── state.json │ └── math.8k │ ├── dataset_dict.json │ ├── test │ ├── data-00000-of-00001.arrow │ ├── dataset_info.json │ └── state.json │ └── train │ ├── data-00000-of-00001.arrow │ ├── dataset_info.json │ └── state.json ├── eval ├── config.json ├── data │ ├── aime24.jsonl │ ├── amc23.jsonl │ └── math500.jsonl ├── eval.py └── utils │ └── equal.py ├── lim ├── lim.sh └── lim_selection.py ├── limr.pdf ├── openrlhf ├── __init__.py ├── cli │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── serve_rm.cpython-310.pyc │ ├── batch_inference.py │ ├── interactive_chat.py │ ├── serve_rm.py │ ├── train_dpo.py │ ├── train_kd.py │ ├── train_kto.py │ ├── train_ppo.py │ ├── train_ppo_ray.py │ ├── train_prm.py │ ├── train_rm.py │ └── train_sft.py ├── datasets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── process_reward_dataset.cpython-310.pyc │ │ ├── prompts_dataset.cpython-310.pyc │ │ ├── reward_dataset.cpython-310.pyc │ │ ├── sft_dataset.cpython-310.pyc │ │ ├── unpaired_preference_dataset.cpython-310.pyc │ │ └── utils.cpython-310.pyc │ ├── process_reward_dataset.py │ ├── prompts_dataset.py │ ├── reward_dataset.py │ ├── sft_dataset.py │ ├── unpaired_preference_dataset.py │ └── utils.py ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── actor.cpython-310.pyc │ │ ├── loss.cpython-310.pyc │ │ ├── model.cpython-310.pyc │ │ ├── ring_attn_utils.cpython-310.pyc │ │ └── utils.cpython-310.pyc │ ├── actor.py │ ├── loss.py │ ├── model.py │ ├── ring_attn_utils.py │ └── utils.py ├── trainer │ ├── __init__.py │ ├── dpo_trainer.py │ ├── kd_trainer.py │ ├── kto_trainer.py │ ├── ppo_trainer.py │ ├── ppo_utils │ │ ├── __init__.py │ │ ├── experience_maker.py │ │ ├── kl_controller.py │ │ └── replay_buffer.py │ ├── prm_trainer.py │ ├── ray │ │ ├── __init__.py │ │ ├── launcher.py │ │ ├── ppo_actor.py │ │ ├── ppo_critic.py │ │ ├── utils.py │ │ ├── vllm_engine.py │ │ └── vllm_worker_wrap.py │ ├── rm_trainer.py │ └── sft_trainer.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── logging_utils.cpython-310.pyc │ ├── processor.cpython-310.pyc │ └── utils.cpython-310.pyc │ ├── check │ ├── __pycache__ │ │ └── qwen_equal.cpython-310.pyc │ ├── evaluation.py │ ├── math_normalization.py │ └── qwen_equal.py │ ├── deepspeed │ ├── __init__.py │ ├── deepspeed.py │ └── deepspeed_utils.py │ ├── distributed_sampler.py │ ├── distributed_util.py │ ├── logging_utils.py │ ├── processor.py │ ├── remote_rm_utils.py │ └── utils.py ├── readme.md └── scripts ├── train_limr.sh └── train_math.8k.sh /assets/main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/assets/main.png -------------------------------------------------------------------------------- /data/evaluation/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/data/evaluation/test.json -------------------------------------------------------------------------------- /data/prompts/math.8k.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/data/prompts/math.8k.json -------------------------------------------------------------------------------- /data/scores.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/data/scores.json -------------------------------------------------------------------------------- /data/train/limr/dataset_dict.json: -------------------------------------------------------------------------------- 1 | {"splits": ["train", "test"]} -------------------------------------------------------------------------------- /data/train/limr/test/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/data/train/limr/test/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /data/train/limr/test/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/data/train/limr/test/dataset_info.json -------------------------------------------------------------------------------- /data/train/limr/test/state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/data/train/limr/test/state.json -------------------------------------------------------------------------------- /data/train/limr/train/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/data/train/limr/train/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /data/train/limr/train/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/data/train/limr/train/dataset_info.json -------------------------------------------------------------------------------- /data/train/limr/train/state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/data/train/limr/train/state.json -------------------------------------------------------------------------------- /data/train/math.8k/dataset_dict.json: -------------------------------------------------------------------------------- 1 | {"splits": ["train", "test"]} -------------------------------------------------------------------------------- /data/train/math.8k/test/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/data/train/math.8k/test/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /data/train/math.8k/test/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/data/train/math.8k/test/dataset_info.json -------------------------------------------------------------------------------- /data/train/math.8k/test/state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/data/train/math.8k/test/state.json -------------------------------------------------------------------------------- /data/train/math.8k/train/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/data/train/math.8k/train/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /data/train/math.8k/train/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/data/train/math.8k/train/dataset_info.json -------------------------------------------------------------------------------- /data/train/math.8k/train/state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/data/train/math.8k/train/state.json -------------------------------------------------------------------------------- /eval/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/eval/config.json -------------------------------------------------------------------------------- /eval/data/aime24.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/eval/data/aime24.jsonl -------------------------------------------------------------------------------- /eval/data/amc23.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/eval/data/amc23.jsonl -------------------------------------------------------------------------------- /eval/data/math500.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/eval/data/math500.jsonl -------------------------------------------------------------------------------- /eval/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/eval/eval.py -------------------------------------------------------------------------------- /eval/utils/equal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/eval/utils/equal.py -------------------------------------------------------------------------------- /lim/lim.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/lim/lim.sh -------------------------------------------------------------------------------- /lim/lim_selection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/lim/lim_selection.py -------------------------------------------------------------------------------- /limr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/limr.pdf -------------------------------------------------------------------------------- /openrlhf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /openrlhf/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /openrlhf/cli/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/cli/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/cli/__pycache__/serve_rm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/cli/__pycache__/serve_rm.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/cli/batch_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/cli/batch_inference.py -------------------------------------------------------------------------------- /openrlhf/cli/interactive_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/cli/interactive_chat.py -------------------------------------------------------------------------------- /openrlhf/cli/serve_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/cli/serve_rm.py -------------------------------------------------------------------------------- /openrlhf/cli/train_dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/cli/train_dpo.py -------------------------------------------------------------------------------- /openrlhf/cli/train_kd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/cli/train_kd.py -------------------------------------------------------------------------------- /openrlhf/cli/train_kto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/cli/train_kto.py -------------------------------------------------------------------------------- /openrlhf/cli/train_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/cli/train_ppo.py -------------------------------------------------------------------------------- /openrlhf/cli/train_ppo_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/cli/train_ppo_ray.py -------------------------------------------------------------------------------- /openrlhf/cli/train_prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/cli/train_prm.py -------------------------------------------------------------------------------- /openrlhf/cli/train_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/cli/train_rm.py -------------------------------------------------------------------------------- /openrlhf/cli/train_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/cli/train_sft.py -------------------------------------------------------------------------------- /openrlhf/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/datasets/__init__.py -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/datasets/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/process_reward_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/datasets/__pycache__/process_reward_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/prompts_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/datasets/__pycache__/prompts_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/reward_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/datasets/__pycache__/reward_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/sft_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/datasets/__pycache__/sft_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/datasets/__pycache__/unpaired_preference_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/datasets/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/datasets/process_reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/datasets/process_reward_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/prompts_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/datasets/prompts_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/datasets/reward_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/datasets/sft_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/unpaired_preference_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/datasets/unpaired_preference_dataset.py -------------------------------------------------------------------------------- /openrlhf/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/datasets/utils.py -------------------------------------------------------------------------------- /openrlhf/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/models/__init__.py -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/actor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/models/__pycache__/actor.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/loss.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/models/__pycache__/loss.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/models/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/ring_attn_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/models/__pycache__/ring_attn_utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/models/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/models/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/models/actor.py -------------------------------------------------------------------------------- /openrlhf/models/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/models/loss.py -------------------------------------------------------------------------------- /openrlhf/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/models/model.py -------------------------------------------------------------------------------- /openrlhf/models/ring_attn_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/models/ring_attn_utils.py -------------------------------------------------------------------------------- /openrlhf/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/models/utils.py -------------------------------------------------------------------------------- /openrlhf/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/dpo_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/kd_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/kd_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/kto_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/kto_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/ppo_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/ppo_utils/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/experience_maker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/ppo_utils/experience_maker.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/kl_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/ppo_utils/kl_controller.py -------------------------------------------------------------------------------- /openrlhf/trainer/ppo_utils/replay_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/ppo_utils/replay_buffer.py -------------------------------------------------------------------------------- /openrlhf/trainer/prm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/prm_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/ray/__init__.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/ray/launcher.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/ppo_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/ray/ppo_actor.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/ppo_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/ray/ppo_critic.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/ray/utils.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/vllm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/ray/vllm_engine.py -------------------------------------------------------------------------------- /openrlhf/trainer/ray/vllm_worker_wrap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/ray/vllm_worker_wrap.py -------------------------------------------------------------------------------- /openrlhf/trainer/rm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/rm_trainer.py -------------------------------------------------------------------------------- /openrlhf/trainer/sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/trainer/sft_trainer.py -------------------------------------------------------------------------------- /openrlhf/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/__init__.py -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/logging_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/__pycache__/logging_utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/processor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/__pycache__/processor.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/check/__pycache__/qwen_equal.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/check/__pycache__/qwen_equal.cpython-310.pyc -------------------------------------------------------------------------------- /openrlhf/utils/check/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/check/evaluation.py -------------------------------------------------------------------------------- /openrlhf/utils/check/math_normalization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/check/math_normalization.py -------------------------------------------------------------------------------- /openrlhf/utils/check/qwen_equal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/check/qwen_equal.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/deepspeed/__init__.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/deepspeed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/deepspeed/deepspeed.py -------------------------------------------------------------------------------- /openrlhf/utils/deepspeed/deepspeed_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/deepspeed/deepspeed_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/distributed_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/distributed_sampler.py -------------------------------------------------------------------------------- /openrlhf/utils/distributed_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/distributed_util.py -------------------------------------------------------------------------------- /openrlhf/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/logging_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/processor.py -------------------------------------------------------------------------------- /openrlhf/utils/remote_rm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/remote_rm_utils.py -------------------------------------------------------------------------------- /openrlhf/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/openrlhf/utils/utils.py -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/readme.md -------------------------------------------------------------------------------- /scripts/train_limr.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/scripts/train_limr.sh -------------------------------------------------------------------------------- /scripts/train_math.8k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GAIR-NLP/LIMR/HEAD/scripts/train_math.8k.sh --------------------------------------------------------------------------------