├── .gitignore ├── LICENSE ├── LICENSE-APACHE.txt ├── README.md ├── eval ├── README.md ├── __init__.py ├── meval │ ├── LICENSE │ ├── data │ │ ├── aime24 │ │ │ └── test.jsonl │ │ ├── amc23 │ │ │ └── test.jsonl │ │ ├── aqua │ │ │ └── test.jsonl │ │ ├── asdiv │ │ │ └── test.jsonl │ │ ├── carp_en │ │ │ ├── demo.json │ │ │ └── test.jsonl │ │ ├── cmath │ │ │ └── test.jsonl │ │ ├── cn_middle_school │ │ │ └── test.jsonl │ │ ├── college_math │ │ │ └── test.jsonl │ │ ├── eval_rm_maj_example │ │ │ └── math_cot_100.jsonl │ │ ├── gaokao2023en │ │ │ └── test.jsonl │ │ ├── gaokao2024_I │ │ │ └── test.jsonl │ │ ├── gaokao2024_II │ │ │ └── test.jsonl │ │ ├── gaokao2024_mix │ │ │ └── test.jsonl │ │ ├── gaokao_math_cloze │ │ │ └── test.jsonl │ │ ├── gaokao_math_qa │ │ │ └── test.jsonl │ │ ├── gsm8k │ │ │ ├── test.jsonl │ │ │ └── train.jsonl │ │ ├── math │ │ │ ├── test.jsonl │ │ │ └── train.jsonl │ │ ├── math500 │ │ │ └── test.jsonl │ │ ├── mawps │ │ │ ├── addsub.jsonl │ │ │ ├── multiarith.jsonl │ │ │ ├── singleeq.jsonl │ │ │ ├── singleop.jsonl │ │ │ └── test.jsonl │ │ ├── minerva_math │ │ │ ├── README.md │ │ │ └── test.jsonl │ │ ├── mmlu_stem │ │ │ └── test.jsonl │ │ ├── olympiadbench │ │ │ ├── test.json │ │ │ └── test.jsonl │ │ ├── olympiadbench_p1 │ │ │ ├── test.json │ │ │ └── test.jsonl │ │ ├── olympiadbench_p2 │ │ │ ├── test.json │ │ │ └── test.jsonl │ │ ├── sat_math │ │ │ └── test.jsonl │ │ ├── svamp │ │ │ └── test.jsonl │ │ └── tabmwp │ │ │ └── test.jsonl │ ├── data_loader.py │ ├── evaluate.py │ ├── examples.py │ ├── grader.py │ ├── math_eval.py │ ├── math_utils.py │ ├── model_utils.py │ ├── parser.py │ ├── process.py │ ├── python_executor.py │ ├── rm_maj_eval.py │ ├── sh │ │ ├── collect_results.py │ │ ├── convert_and_evaluate_gpu.sh │ │ ├── eval.sh │ │ ├── eval_single_node.sh │ │ └── run.sh │ ├── trajectory.py │ └── utils.py ├── requirements.txt └── setup.py ├── fig ├── attempt_all.webp ├── attempt_avg.webp └── learning_curve.webp ├── script ├── zero_to_fp32.py └── zero_to_hf.py └── train ├── README.md ├── README_zh.md ├── data ├── math_level3to5_data.json └── math_level3to5_data_processed_with_qwen_prompt.json ├── dockerfile ├── Dockerfile └── docker-entrypoint.sh ├── docs ├── logo.png ├── ppo_examples.md └── ray_architecture.png ├── examples └── script │ ├── baseline.sh │ ├── baseline_debug.sh │ ├── multi_attempt.sh │ └── multi_attempt_debug.sh ├── openrlhf ├── __init__.py ├── cli │ ├── .ipynb_checkpoints │ │ └── train_ppo_ray-checkpoint.py │ ├── __init__.py │ ├── batch_inference.py │ ├── interactive_chat.py │ ├── serve_rm.py │ ├── train_dpo.py │ ├── train_kd.py │ ├── train_kto.py │ ├── train_ppo.py │ ├── train_ppo_ray.py │ ├── train_prm.py │ ├── train_rm.py │ └── train_sft.py ├── datasets │ ├── __init__.py │ ├── process_reward_dataset.py │ ├── prompts_dataset.py │ ├── reward_dataset.py │ ├── sft_dataset.py │ ├── unpaired_preference_dataset.py │ └── utils.py ├── duality │ ├── __init__.py │ ├── utils.py │ └── vllm_mul.py ├── models │ ├── __init__.py │ ├── actor.py │ ├── loss.py │ ├── model.py │ ├── ring_attn_utils.py │ └── utils.py ├── trainer │ ├── __init__.py │ ├── dpo_trainer.py │ ├── kd_trainer.py │ ├── kto_trainer.py │ ├── ppo_trainer.py │ ├── ppo_utils │ │ ├── __init__.py │ │ ├── experience_maker.py │ │ ├── kl_controller.py │ │ ├── qwen_math_eval_toolkit │ │ │ ├── examples.py │ │ │ ├── grader.py │ │ │ ├── parser.py │ │ │ └── utils.py │ │ └── replay_buffer.py │ ├── prm_trainer.py │ ├── ray │ │ ├── __init__.py │ │ ├── launcher.py │ │ ├── ppo_actor.py │ │ ├── ppo_critic.py │ │ ├── ppo_ref.py │ │ ├── vllm_engine.py │ │ └── vllm_worker_wrap.py │ ├── rm_trainer.py │ └── sft_trainer.py └── utils │ ├── __init__.py │ ├── deepspeed.py │ ├── deepspeed_utils.py │ ├── distributed_sampler.py │ ├── distributed_util.py │ ├── logging_utils.py │ ├── processor.py │ ├── remote_rm_utils.py │ └── utils.py ├── pyproject.toml ├── recipes ├── deepspeed_zero2.yaml ├── deepspeed_zero2_no_offload_nodes.yaml ├── deepspeed_zero3.yaml └── deepspeed_zero3_no_offload_nodes.yaml ├── requirements.txt ├── setup.py └── version.txt /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/LICENSE -------------------------------------------------------------------------------- /LICENSE-APACHE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/LICENSE-APACHE.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/README.md -------------------------------------------------------------------------------- /eval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/README.md -------------------------------------------------------------------------------- /eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval/meval/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/LICENSE -------------------------------------------------------------------------------- /eval/meval/data/aime24/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/aime24/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/amc23/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/amc23/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/aqua/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/aqua/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/asdiv/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/asdiv/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/carp_en/demo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/carp_en/demo.json -------------------------------------------------------------------------------- /eval/meval/data/carp_en/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/carp_en/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/cmath/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/cmath/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/cn_middle_school/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/cn_middle_school/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/college_math/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/college_math/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/eval_rm_maj_example/math_cot_100.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/eval_rm_maj_example/math_cot_100.jsonl -------------------------------------------------------------------------------- /eval/meval/data/gaokao2023en/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/gaokao2023en/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/gaokao2024_I/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/gaokao2024_I/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/gaokao2024_II/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/gaokao2024_II/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/gaokao2024_mix/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/gaokao2024_mix/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/gaokao_math_cloze/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/gaokao_math_cloze/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/gaokao_math_qa/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/gaokao_math_qa/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/gsm8k/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/gsm8k/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/gsm8k/train.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/gsm8k/train.jsonl -------------------------------------------------------------------------------- /eval/meval/data/math/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/math/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/math/train.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/math/train.jsonl -------------------------------------------------------------------------------- /eval/meval/data/math500/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/math500/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/mawps/addsub.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/mawps/addsub.jsonl -------------------------------------------------------------------------------- /eval/meval/data/mawps/multiarith.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/mawps/multiarith.jsonl -------------------------------------------------------------------------------- /eval/meval/data/mawps/singleeq.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/mawps/singleeq.jsonl -------------------------------------------------------------------------------- /eval/meval/data/mawps/singleop.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/mawps/singleop.jsonl -------------------------------------------------------------------------------- /eval/meval/data/mawps/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/mawps/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/minerva_math/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/minerva_math/README.md -------------------------------------------------------------------------------- /eval/meval/data/minerva_math/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/minerva_math/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/mmlu_stem/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/mmlu_stem/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/olympiadbench/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/olympiadbench/test.json -------------------------------------------------------------------------------- /eval/meval/data/olympiadbench/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/olympiadbench/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/olympiadbench_p1/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/olympiadbench_p1/test.json -------------------------------------------------------------------------------- /eval/meval/data/olympiadbench_p1/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/olympiadbench_p1/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/olympiadbench_p2/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/olympiadbench_p2/test.json -------------------------------------------------------------------------------- /eval/meval/data/olympiadbench_p2/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/olympiadbench_p2/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/sat_math/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/sat_math/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/svamp/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/svamp/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data/tabmwp/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data/tabmwp/test.jsonl -------------------------------------------------------------------------------- /eval/meval/data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/data_loader.py -------------------------------------------------------------------------------- /eval/meval/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/evaluate.py -------------------------------------------------------------------------------- /eval/meval/examples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/examples.py -------------------------------------------------------------------------------- /eval/meval/grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/grader.py -------------------------------------------------------------------------------- /eval/meval/math_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/math_eval.py -------------------------------------------------------------------------------- /eval/meval/math_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/math_utils.py -------------------------------------------------------------------------------- /eval/meval/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/model_utils.py -------------------------------------------------------------------------------- /eval/meval/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/parser.py -------------------------------------------------------------------------------- /eval/meval/process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/process.py -------------------------------------------------------------------------------- /eval/meval/python_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/python_executor.py -------------------------------------------------------------------------------- /eval/meval/rm_maj_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/rm_maj_eval.py -------------------------------------------------------------------------------- /eval/meval/sh/collect_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/sh/collect_results.py -------------------------------------------------------------------------------- /eval/meval/sh/convert_and_evaluate_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/sh/convert_and_evaluate_gpu.sh -------------------------------------------------------------------------------- /eval/meval/sh/eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/sh/eval.sh -------------------------------------------------------------------------------- /eval/meval/sh/eval_single_node.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/sh/eval_single_node.sh -------------------------------------------------------------------------------- /eval/meval/sh/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/sh/run.sh -------------------------------------------------------------------------------- /eval/meval/trajectory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/trajectory.py -------------------------------------------------------------------------------- /eval/meval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/meval/utils.py -------------------------------------------------------------------------------- /eval/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/requirements.txt -------------------------------------------------------------------------------- /eval/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/eval/setup.py -------------------------------------------------------------------------------- /fig/attempt_all.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/fig/attempt_all.webp -------------------------------------------------------------------------------- /fig/attempt_avg.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/fig/attempt_avg.webp -------------------------------------------------------------------------------- /fig/learning_curve.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/fig/learning_curve.webp -------------------------------------------------------------------------------- /script/zero_to_fp32.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/script/zero_to_fp32.py -------------------------------------------------------------------------------- /script/zero_to_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/script/zero_to_hf.py -------------------------------------------------------------------------------- /train/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/README.md -------------------------------------------------------------------------------- /train/README_zh.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/README_zh.md -------------------------------------------------------------------------------- /train/data/math_level3to5_data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/data/math_level3to5_data.json -------------------------------------------------------------------------------- /train/data/math_level3to5_data_processed_with_qwen_prompt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/data/math_level3to5_data_processed_with_qwen_prompt.json -------------------------------------------------------------------------------- /train/dockerfile/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/dockerfile/Dockerfile -------------------------------------------------------------------------------- /train/dockerfile/docker-entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/dockerfile/docker-entrypoint.sh -------------------------------------------------------------------------------- /train/docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/docs/logo.png -------------------------------------------------------------------------------- /train/docs/ppo_examples.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/docs/ppo_examples.md -------------------------------------------------------------------------------- /train/docs/ray_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/docs/ray_architecture.png -------------------------------------------------------------------------------- /train/examples/script/baseline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/examples/script/baseline.sh -------------------------------------------------------------------------------- /train/examples/script/baseline_debug.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/examples/script/baseline_debug.sh -------------------------------------------------------------------------------- /train/examples/script/multi_attempt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/examples/script/multi_attempt.sh -------------------------------------------------------------------------------- /train/examples/script/multi_attempt_debug.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/examples/script/multi_attempt_debug.sh -------------------------------------------------------------------------------- /train/openrlhf/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.4" -------------------------------------------------------------------------------- /train/openrlhf/cli/.ipynb_checkpoints/train_ppo_ray-checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/cli/.ipynb_checkpoints/train_ppo_ray-checkpoint.py -------------------------------------------------------------------------------- /train/openrlhf/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/openrlhf/cli/batch_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/cli/batch_inference.py -------------------------------------------------------------------------------- /train/openrlhf/cli/interactive_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/cli/interactive_chat.py -------------------------------------------------------------------------------- /train/openrlhf/cli/serve_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/cli/serve_rm.py -------------------------------------------------------------------------------- /train/openrlhf/cli/train_dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/cli/train_dpo.py -------------------------------------------------------------------------------- /train/openrlhf/cli/train_kd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/cli/train_kd.py -------------------------------------------------------------------------------- /train/openrlhf/cli/train_kto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/cli/train_kto.py -------------------------------------------------------------------------------- /train/openrlhf/cli/train_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/cli/train_ppo.py -------------------------------------------------------------------------------- /train/openrlhf/cli/train_ppo_ray.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/cli/train_ppo_ray.py -------------------------------------------------------------------------------- /train/openrlhf/cli/train_prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/cli/train_prm.py -------------------------------------------------------------------------------- /train/openrlhf/cli/train_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/cli/train_rm.py -------------------------------------------------------------------------------- /train/openrlhf/cli/train_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/cli/train_sft.py -------------------------------------------------------------------------------- /train/openrlhf/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/datasets/__init__.py -------------------------------------------------------------------------------- /train/openrlhf/datasets/process_reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/datasets/process_reward_dataset.py -------------------------------------------------------------------------------- /train/openrlhf/datasets/prompts_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/datasets/prompts_dataset.py -------------------------------------------------------------------------------- /train/openrlhf/datasets/reward_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/datasets/reward_dataset.py -------------------------------------------------------------------------------- /train/openrlhf/datasets/sft_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/datasets/sft_dataset.py -------------------------------------------------------------------------------- /train/openrlhf/datasets/unpaired_preference_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/datasets/unpaired_preference_dataset.py -------------------------------------------------------------------------------- /train/openrlhf/datasets/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/datasets/utils.py -------------------------------------------------------------------------------- /train/openrlhf/duality/__init__.py: -------------------------------------------------------------------------------- 1 | from .vllm_mul import MulLLM -------------------------------------------------------------------------------- /train/openrlhf/duality/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/duality/utils.py -------------------------------------------------------------------------------- /train/openrlhf/duality/vllm_mul.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/duality/vllm_mul.py -------------------------------------------------------------------------------- /train/openrlhf/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/models/__init__.py -------------------------------------------------------------------------------- /train/openrlhf/models/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/models/actor.py -------------------------------------------------------------------------------- /train/openrlhf/models/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/models/loss.py -------------------------------------------------------------------------------- /train/openrlhf/models/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/models/model.py -------------------------------------------------------------------------------- /train/openrlhf/models/ring_attn_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/models/ring_attn_utils.py -------------------------------------------------------------------------------- /train/openrlhf/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/models/utils.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/__init__.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/dpo_trainer.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/kd_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/kd_trainer.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/kto_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/kto_trainer.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/ppo_trainer.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/ppo_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/ppo_utils/__init__.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/ppo_utils/experience_maker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/ppo_utils/experience_maker.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/ppo_utils/kl_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/ppo_utils/kl_controller.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/ppo_utils/qwen_math_eval_toolkit/examples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/ppo_utils/qwen_math_eval_toolkit/examples.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/ppo_utils/qwen_math_eval_toolkit/grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/ppo_utils/qwen_math_eval_toolkit/grader.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/ppo_utils/qwen_math_eval_toolkit/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/ppo_utils/qwen_math_eval_toolkit/parser.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/ppo_utils/qwen_math_eval_toolkit/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/ppo_utils/qwen_math_eval_toolkit/utils.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/ppo_utils/replay_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/ppo_utils/replay_buffer.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/prm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/prm_trainer.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/ray/__init__.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/ray/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/ray/launcher.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/ray/ppo_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/ray/ppo_actor.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/ray/ppo_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/ray/ppo_critic.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/ray/ppo_ref.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/ray/ppo_ref.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/ray/vllm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/ray/vllm_engine.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/ray/vllm_worker_wrap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/ray/vllm_worker_wrap.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/rm_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/rm_trainer.py -------------------------------------------------------------------------------- /train/openrlhf/trainer/sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/trainer/sft_trainer.py -------------------------------------------------------------------------------- /train/openrlhf/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/utils/__init__.py -------------------------------------------------------------------------------- /train/openrlhf/utils/deepspeed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/utils/deepspeed.py -------------------------------------------------------------------------------- /train/openrlhf/utils/deepspeed_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/utils/deepspeed_utils.py -------------------------------------------------------------------------------- /train/openrlhf/utils/distributed_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/utils/distributed_sampler.py -------------------------------------------------------------------------------- /train/openrlhf/utils/distributed_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/utils/distributed_util.py -------------------------------------------------------------------------------- /train/openrlhf/utils/logging_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/utils/logging_utils.py -------------------------------------------------------------------------------- /train/openrlhf/utils/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/utils/processor.py -------------------------------------------------------------------------------- /train/openrlhf/utils/remote_rm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/utils/remote_rm_utils.py -------------------------------------------------------------------------------- /train/openrlhf/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/openrlhf/utils/utils.py -------------------------------------------------------------------------------- /train/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/pyproject.toml -------------------------------------------------------------------------------- /train/recipes/deepspeed_zero2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/recipes/deepspeed_zero2.yaml -------------------------------------------------------------------------------- /train/recipes/deepspeed_zero2_no_offload_nodes.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/recipes/deepspeed_zero2_no_offload_nodes.yaml -------------------------------------------------------------------------------- /train/recipes/deepspeed_zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/recipes/deepspeed_zero3.yaml -------------------------------------------------------------------------------- /train/recipes/deepspeed_zero3_no_offload_nodes.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/recipes/deepspeed_zero3_no_offload_nodes.yaml -------------------------------------------------------------------------------- /train/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/requirements.txt -------------------------------------------------------------------------------- /train/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DualityRL/multi-attempt/HEAD/train/setup.py -------------------------------------------------------------------------------- /train/version.txt: -------------------------------------------------------------------------------- 1 | 0.5.0 --------------------------------------------------------------------------------