├── .gitignore ├── 2505.12346v1.pdf ├── 7df466b1f3a8b6dd1d417827be505667a42145f724465f7413b3785fbd9d7ac2Qwen-Qwen2.5-Math-7B.lock ├── LICENSE ├── LICENSE.txt ├── Makefile ├── README.md ├── analysis ├── answering_ability.py ├── keyword_count.py └── sr_detection.py ├── assets ├── benchmark.png ├── benchmark_table.png ├── deepseek-base-aha.png ├── drgrpo.png ├── llama-r1-zero.png ├── main-result.png ├── qwen-math-base-scores.png ├── seed-frame.png └── template-data-duet.png ├── conda.yml ├── datasets ├── evaluation_suite │ ├── aime │ │ ├── data-00000-of-00001.arrow │ │ ├── dataset_info.json │ │ └── state.json │ ├── amc │ │ ├── data-00000-of-00001.arrow │ │ ├── dataset_info.json │ │ └── state.json │ ├── dataset_dict.json │ ├── math │ │ ├── data-00000-of-00001.arrow │ │ ├── dataset_info.json │ │ └── state.json │ ├── minerva │ │ ├── data-00000-of-00001.arrow │ │ ├── dataset_info.json │ │ └── state.json │ └── olympiad_bench │ │ ├── data-00000-of-00001.arrow │ │ ├── dataset_info.json │ │ └── state.json └── train │ ├── asdiv_2k │ ├── dataset_dict.json │ └── train │ │ ├── data-00000-of-00001.arrow │ │ ├── dataset_info.json │ │ └── state.json │ ├── gsm_8k │ ├── dataset_dict.json │ └── train │ │ ├── data-00000-of-00001.arrow │ │ ├── dataset_info.json │ │ └── state.json │ ├── math_12k │ ├── dataset_dict.json │ └── train │ │ ├── data-00000-of-00001.arrow │ │ ├── dataset_info.json │ │ └── state.json │ ├── math_lvl3to5_8k │ ├── dataset_dict.json │ ├── eval │ │ ├── data-00000-of-00001.arrow │ │ ├── dataset_info.json │ │ └── state.json │ └── train │ │ ├── data-00000-of-00001.arrow │ │ ├── dataset_info.json │ │ └── state.json │ └── orz_57k │ ├── dataset_dict.json │ └── train │ ├── data-00000-of-00001.arrow │ ├── dataset_info.json │ └── state.json ├── deploy_dpsk └── serving.sh ├── eval.sh ├── evaluate_model.py ├── examples ├── llama3.2-3b-numinaqa.sh ├── qwen2.5-math-1.5b.sh └── qwen2.5-math-7b.sh ├── oat ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── benchmark │ ├── README.md │ ├── llama3_oat.sh │ ├── postprocess.py │ ├── pythia_custom_remote_rm.py │ ├── pythia_oat.sh │ └── pythia_trl.sh ├── docs │ ├── alignment_as_cdb.md │ ├── new_logo.png │ ├── ppo_temperature.png │ ├── preference_learning_examples.md │ ├── r1_zero_countdown.png │ └── reasoning_examples.md ├── examples │ ├── 2gpu_online_simpo_pythia_1b_tldr_pairrm.sh │ ├── 2gpu_online_simpo_pythia_1b_tldr_skyworkrm.sh │ ├── 8gpu_online_dpo_pythia_1b_tldr_gpt_judge.sh │ ├── 8gpu_online_dpo_pythia_1b_tldr_skyworkrm.sh │ ├── 8gpu_sea_dpo_pythia_1b_tldr_skyworkrm.sh │ ├── 8gpu_sft_math.sh │ ├── offline_dpo_llama3_8b_ultrafeedback.sh │ └── r1_zero_math.py ├── k8s │ ├── README.md │ ├── readiness.sh │ ├── rm-service-armo.yaml │ ├── rm-service.yaml │ ├── serving-armo.yaml │ ├── serving.yaml │ └── values.yaml ├── oat │ ├── __about__.py │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-311.pyc │ │ ├── args.cpython-310.pyc │ │ ├── args.cpython-311.pyc │ │ ├── cmh_types.cpython-310.pyc │ │ ├── exploration.cpython-310.pyc │ │ ├── interface.cpython-310.pyc │ │ ├── model.cpython-310.pyc │ │ ├── types.cpython-310.pyc │ │ └── types.cpython-311.pyc │ ├── actors │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── base.cpython-310.pyc │ │ │ ├── base.cpython-311.pyc │ │ │ ├── preference.cpython-310.pyc │ │ │ ├── preference.cpython-311.pyc │ │ │ └── reward.cpython-310.pyc │ │ ├── base.py │ │ ├── preference.py │ │ └── reward.py │ ├── algorithms │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── cmh_ppo.cpython-310.pyc │ │ │ └── ppo.cpython-310.pyc │ │ ├── apl.py │ │ ├── cmh_ppo.py │ │ ├── ppo.py │ │ ├── rft.py │ │ └── xpo.py │ ├── args.py │ ├── cmh_types.py │ ├── collectors │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── asynchronous.cpython-310.pyc │ │ │ └── base.cpython-310.pyc │ │ ├── asynchronous.py │ │ └── base.py │ ├── experiment │ │ ├── __init__.py │ │ ├── main.py │ │ ├── run_apl.py │ │ ├── run_offline.py │ │ ├── run_offline_lp.py │ │ ├── run_offline_ppo.py │ │ ├── run_ppo.py │ │ └── run_xpo.py │ ├── exploration.py │ ├── interface.py │ ├── learners │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── base.cpython-310.pyc │ │ │ ├── dap.cpython-310.pyc │ │ │ ├── dap_with_rm.cpython-310.pyc │ │ │ ├── loss.cpython-310.pyc │ │ │ ├── ntp.cpython-310.pyc │ │ │ ├── offline.cpython-310.pyc │ │ │ ├── offline_dap.cpython-310.pyc │ │ │ ├── rl.cpython-310.pyc │ │ │ └── sft.cpython-310.pyc │ │ ├── base.py │ │ ├── dap.py │ │ ├── dap_with_rm.py │ │ ├── loss.py │ │ ├── ntp.py │ │ ├── offline.py │ │ ├── offline_dap.py │ │ ├── rl.py │ │ └── sft.py │ ├── model.py │ ├── multistep.py │ ├── oracles │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── base.cpython-310.pyc │ │ │ ├── base.cpython-311.pyc │ │ │ ├── gpt.cpython-310.pyc │ │ │ ├── gpt.cpython-311.pyc │ │ │ ├── gsm8k.cpython-310.pyc │ │ │ ├── gsm8k.cpython-311.pyc │ │ │ ├── pair.cpython-310.pyc │ │ │ └── pair.cpython-311.pyc │ │ ├── base.py │ │ ├── countdown.py │ │ ├── gpt.py │ │ ├── gsm8k.py │ │ ├── pair.py │ │ └── remote │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── client.cpython-310.pyc │ │ │ └── client.cpython-311.pyc │ │ │ ├── client.py │ │ │ └── server.py │ ├── prompts.py │ ├── rm │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── __init__.cpython-311.pyc │ │ │ ├── backbone.cpython-310.pyc │ │ │ ├── model.cpython-310.pyc │ │ │ ├── model.cpython-311.pyc │ │ │ ├── networks.cpython-310.pyc │ │ │ ├── networks.cpython-311.pyc │ │ │ ├── uncertainty.cpython-310.pyc │ │ │ └── uncertainty.cpython-311.pyc │ │ ├── backbone.py │ │ ├── model.py │ │ ├── networks.py │ │ └── uncertainty.py │ ├── types.py │ └── utils │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-311.pyc │ │ ├── buffer.cpython-310.pyc │ │ ├── buffer.cpython-311.pyc │ │ ├── data.cpython-310.pyc │ │ ├── deepspeed.cpython-310.pyc │ │ ├── distributed.cpython-310.pyc │ │ ├── distributed.cpython-311.pyc │ │ ├── ipc.cpython-310.pyc │ │ ├── ipc.cpython-311.pyc │ │ ├── launcher.cpython-310.pyc │ │ └── ops.cpython-310.pyc │ │ ├── buffer.py │ │ ├── data.py │ │ ├── deepspeed.py │ │ ├── distributed.py │ │ ├── ipc.py │ │ ├── launcher.py │ │ ├── ops.py │ │ └── slicer.py ├── pyproject.toml ├── scripts │ └── inference.py ├── setup.py └── test │ └── test_rm_server.py ├── pyproject.toml ├── requirements.txt ├── run_debug.sh ├── seed.txt ├── seed_grpo.py ├── setup.sh └── understand_r1_zero ├── __about__.py ├── __init__.py └── math_grader.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/.gitignore -------------------------------------------------------------------------------- /2505.12346v1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/2505.12346v1.pdf -------------------------------------------------------------------------------- /7df466b1f3a8b6dd1d417827be505667a42145f724465f7413b3785fbd9d7ac2Qwen-Qwen2.5-Math-7B.lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/LICENSE -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/LICENSE.txt -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/README.md -------------------------------------------------------------------------------- /analysis/answering_ability.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/analysis/answering_ability.py -------------------------------------------------------------------------------- /analysis/keyword_count.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/analysis/keyword_count.py -------------------------------------------------------------------------------- /analysis/sr_detection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/analysis/sr_detection.py -------------------------------------------------------------------------------- /assets/benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/assets/benchmark.png -------------------------------------------------------------------------------- /assets/benchmark_table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/assets/benchmark_table.png -------------------------------------------------------------------------------- /assets/deepseek-base-aha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/assets/deepseek-base-aha.png -------------------------------------------------------------------------------- /assets/drgrpo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/assets/drgrpo.png -------------------------------------------------------------------------------- /assets/llama-r1-zero.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/assets/llama-r1-zero.png -------------------------------------------------------------------------------- /assets/main-result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/assets/main-result.png -------------------------------------------------------------------------------- /assets/qwen-math-base-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/assets/qwen-math-base-scores.png -------------------------------------------------------------------------------- /assets/seed-frame.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/assets/seed-frame.png -------------------------------------------------------------------------------- /assets/template-data-duet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/assets/template-data-duet.png -------------------------------------------------------------------------------- /conda.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/conda.yml -------------------------------------------------------------------------------- /datasets/evaluation_suite/aime/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/evaluation_suite/aime/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /datasets/evaluation_suite/aime/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/evaluation_suite/aime/dataset_info.json -------------------------------------------------------------------------------- /datasets/evaluation_suite/aime/state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/evaluation_suite/aime/state.json -------------------------------------------------------------------------------- /datasets/evaluation_suite/amc/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/evaluation_suite/amc/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /datasets/evaluation_suite/amc/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/evaluation_suite/amc/dataset_info.json -------------------------------------------------------------------------------- /datasets/evaluation_suite/amc/state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/evaluation_suite/amc/state.json -------------------------------------------------------------------------------- /datasets/evaluation_suite/dataset_dict.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/evaluation_suite/dataset_dict.json -------------------------------------------------------------------------------- /datasets/evaluation_suite/math/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/evaluation_suite/math/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /datasets/evaluation_suite/math/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/evaluation_suite/math/dataset_info.json -------------------------------------------------------------------------------- /datasets/evaluation_suite/math/state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/evaluation_suite/math/state.json -------------------------------------------------------------------------------- /datasets/evaluation_suite/minerva/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/evaluation_suite/minerva/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /datasets/evaluation_suite/minerva/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/evaluation_suite/minerva/dataset_info.json -------------------------------------------------------------------------------- /datasets/evaluation_suite/minerva/state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/evaluation_suite/minerva/state.json -------------------------------------------------------------------------------- /datasets/evaluation_suite/olympiad_bench/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/evaluation_suite/olympiad_bench/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /datasets/evaluation_suite/olympiad_bench/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/evaluation_suite/olympiad_bench/dataset_info.json -------------------------------------------------------------------------------- /datasets/evaluation_suite/olympiad_bench/state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/evaluation_suite/olympiad_bench/state.json -------------------------------------------------------------------------------- /datasets/train/asdiv_2k/dataset_dict.json: -------------------------------------------------------------------------------- 1 | {"splits": ["train"]} -------------------------------------------------------------------------------- /datasets/train/asdiv_2k/train/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/asdiv_2k/train/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /datasets/train/asdiv_2k/train/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/asdiv_2k/train/dataset_info.json -------------------------------------------------------------------------------- /datasets/train/asdiv_2k/train/state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/asdiv_2k/train/state.json -------------------------------------------------------------------------------- /datasets/train/gsm_8k/dataset_dict.json: -------------------------------------------------------------------------------- 1 | {"splits": ["train"]} -------------------------------------------------------------------------------- /datasets/train/gsm_8k/train/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/gsm_8k/train/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /datasets/train/gsm_8k/train/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/gsm_8k/train/dataset_info.json -------------------------------------------------------------------------------- /datasets/train/gsm_8k/train/state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/gsm_8k/train/state.json -------------------------------------------------------------------------------- /datasets/train/math_12k/dataset_dict.json: -------------------------------------------------------------------------------- 1 | {"splits": ["train"]} -------------------------------------------------------------------------------- /datasets/train/math_12k/train/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/math_12k/train/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /datasets/train/math_12k/train/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/math_12k/train/dataset_info.json -------------------------------------------------------------------------------- /datasets/train/math_12k/train/state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/math_12k/train/state.json -------------------------------------------------------------------------------- /datasets/train/math_lvl3to5_8k/dataset_dict.json: -------------------------------------------------------------------------------- 1 | {"splits": ["train", "eval"]} -------------------------------------------------------------------------------- /datasets/train/math_lvl3to5_8k/eval/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/math_lvl3to5_8k/eval/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /datasets/train/math_lvl3to5_8k/eval/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/math_lvl3to5_8k/eval/dataset_info.json -------------------------------------------------------------------------------- /datasets/train/math_lvl3to5_8k/eval/state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/math_lvl3to5_8k/eval/state.json -------------------------------------------------------------------------------- /datasets/train/math_lvl3to5_8k/train/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/math_lvl3to5_8k/train/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /datasets/train/math_lvl3to5_8k/train/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/math_lvl3to5_8k/train/dataset_info.json -------------------------------------------------------------------------------- /datasets/train/math_lvl3to5_8k/train/state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/math_lvl3to5_8k/train/state.json -------------------------------------------------------------------------------- /datasets/train/orz_57k/dataset_dict.json: -------------------------------------------------------------------------------- 1 | {"splits": ["train"]} -------------------------------------------------------------------------------- /datasets/train/orz_57k/train/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/orz_57k/train/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /datasets/train/orz_57k/train/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/orz_57k/train/dataset_info.json -------------------------------------------------------------------------------- /datasets/train/orz_57k/train/state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/datasets/train/orz_57k/train/state.json -------------------------------------------------------------------------------- /deploy_dpsk/serving.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/deploy_dpsk/serving.sh -------------------------------------------------------------------------------- /eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/eval.sh -------------------------------------------------------------------------------- /evaluate_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/evaluate_model.py -------------------------------------------------------------------------------- /examples/llama3.2-3b-numinaqa.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/examples/llama3.2-3b-numinaqa.sh -------------------------------------------------------------------------------- /examples/qwen2.5-math-1.5b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/examples/qwen2.5-math-1.5b.sh -------------------------------------------------------------------------------- /examples/qwen2.5-math-7b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/examples/qwen2.5-math-7b.sh -------------------------------------------------------------------------------- /oat/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/.gitignore -------------------------------------------------------------------------------- /oat/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/LICENSE -------------------------------------------------------------------------------- /oat/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/Makefile -------------------------------------------------------------------------------- /oat/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/README.md -------------------------------------------------------------------------------- /oat/benchmark/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/benchmark/README.md -------------------------------------------------------------------------------- /oat/benchmark/llama3_oat.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/benchmark/llama3_oat.sh -------------------------------------------------------------------------------- /oat/benchmark/postprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/benchmark/postprocess.py -------------------------------------------------------------------------------- /oat/benchmark/pythia_custom_remote_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/benchmark/pythia_custom_remote_rm.py -------------------------------------------------------------------------------- /oat/benchmark/pythia_oat.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/benchmark/pythia_oat.sh -------------------------------------------------------------------------------- /oat/benchmark/pythia_trl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/benchmark/pythia_trl.sh -------------------------------------------------------------------------------- /oat/docs/alignment_as_cdb.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/docs/alignment_as_cdb.md -------------------------------------------------------------------------------- /oat/docs/new_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/docs/new_logo.png -------------------------------------------------------------------------------- /oat/docs/ppo_temperature.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/docs/ppo_temperature.png -------------------------------------------------------------------------------- /oat/docs/preference_learning_examples.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/docs/preference_learning_examples.md -------------------------------------------------------------------------------- /oat/docs/r1_zero_countdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/docs/r1_zero_countdown.png -------------------------------------------------------------------------------- /oat/docs/reasoning_examples.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/docs/reasoning_examples.md -------------------------------------------------------------------------------- /oat/examples/2gpu_online_simpo_pythia_1b_tldr_pairrm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/examples/2gpu_online_simpo_pythia_1b_tldr_pairrm.sh -------------------------------------------------------------------------------- /oat/examples/2gpu_online_simpo_pythia_1b_tldr_skyworkrm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/examples/2gpu_online_simpo_pythia_1b_tldr_skyworkrm.sh -------------------------------------------------------------------------------- /oat/examples/8gpu_online_dpo_pythia_1b_tldr_gpt_judge.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/examples/8gpu_online_dpo_pythia_1b_tldr_gpt_judge.sh -------------------------------------------------------------------------------- /oat/examples/8gpu_online_dpo_pythia_1b_tldr_skyworkrm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/examples/8gpu_online_dpo_pythia_1b_tldr_skyworkrm.sh -------------------------------------------------------------------------------- /oat/examples/8gpu_sea_dpo_pythia_1b_tldr_skyworkrm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/examples/8gpu_sea_dpo_pythia_1b_tldr_skyworkrm.sh -------------------------------------------------------------------------------- /oat/examples/8gpu_sft_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/examples/8gpu_sft_math.sh -------------------------------------------------------------------------------- /oat/examples/offline_dpo_llama3_8b_ultrafeedback.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/examples/offline_dpo_llama3_8b_ultrafeedback.sh -------------------------------------------------------------------------------- /oat/examples/r1_zero_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/examples/r1_zero_math.py -------------------------------------------------------------------------------- /oat/k8s/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/k8s/README.md -------------------------------------------------------------------------------- /oat/k8s/readiness.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/k8s/readiness.sh -------------------------------------------------------------------------------- /oat/k8s/rm-service-armo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/k8s/rm-service-armo.yaml -------------------------------------------------------------------------------- /oat/k8s/rm-service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/k8s/rm-service.yaml -------------------------------------------------------------------------------- /oat/k8s/serving-armo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/k8s/serving-armo.yaml -------------------------------------------------------------------------------- /oat/k8s/serving.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/k8s/serving.yaml -------------------------------------------------------------------------------- /oat/k8s/values.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/k8s/values.yaml -------------------------------------------------------------------------------- /oat/oat/__about__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/__about__.py -------------------------------------------------------------------------------- /oat/oat/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/__init__.py -------------------------------------------------------------------------------- /oat/oat/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/__pycache__/args.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/__pycache__/args.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/__pycache__/args.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/__pycache__/args.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/__pycache__/cmh_types.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/__pycache__/cmh_types.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/__pycache__/exploration.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/__pycache__/exploration.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/__pycache__/interface.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/__pycache__/interface.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/__pycache__/types.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/__pycache__/types.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/__pycache__/types.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/__pycache__/types.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/actors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/actors/__init__.py -------------------------------------------------------------------------------- /oat/oat/actors/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/actors/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/actors/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/actors/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/actors/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/actors/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/actors/__pycache__/base.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/actors/__pycache__/base.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/actors/__pycache__/preference.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/actors/__pycache__/preference.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/actors/__pycache__/preference.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/actors/__pycache__/preference.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/actors/__pycache__/reward.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/actors/__pycache__/reward.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/actors/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/actors/base.py -------------------------------------------------------------------------------- /oat/oat/actors/preference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/actors/preference.py -------------------------------------------------------------------------------- /oat/oat/actors/reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/actors/reward.py -------------------------------------------------------------------------------- /oat/oat/algorithms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/algorithms/__init__.py -------------------------------------------------------------------------------- /oat/oat/algorithms/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/algorithms/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/algorithms/__pycache__/cmh_ppo.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/algorithms/__pycache__/cmh_ppo.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/algorithms/__pycache__/ppo.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/algorithms/__pycache__/ppo.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/algorithms/apl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/algorithms/apl.py -------------------------------------------------------------------------------- /oat/oat/algorithms/cmh_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/algorithms/cmh_ppo.py -------------------------------------------------------------------------------- /oat/oat/algorithms/ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/algorithms/ppo.py -------------------------------------------------------------------------------- /oat/oat/algorithms/rft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/algorithms/rft.py -------------------------------------------------------------------------------- /oat/oat/algorithms/xpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/algorithms/xpo.py -------------------------------------------------------------------------------- /oat/oat/args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/args.py -------------------------------------------------------------------------------- /oat/oat/cmh_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/cmh_types.py -------------------------------------------------------------------------------- /oat/oat/collectors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/collectors/__init__.py -------------------------------------------------------------------------------- /oat/oat/collectors/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/collectors/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/collectors/__pycache__/asynchronous.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/collectors/__pycache__/asynchronous.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/collectors/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/collectors/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/collectors/asynchronous.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/collectors/asynchronous.py -------------------------------------------------------------------------------- /oat/oat/collectors/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/collectors/base.py -------------------------------------------------------------------------------- /oat/oat/experiment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/experiment/__init__.py -------------------------------------------------------------------------------- /oat/oat/experiment/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/experiment/main.py -------------------------------------------------------------------------------- /oat/oat/experiment/run_apl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/experiment/run_apl.py -------------------------------------------------------------------------------- /oat/oat/experiment/run_offline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/experiment/run_offline.py -------------------------------------------------------------------------------- /oat/oat/experiment/run_offline_lp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/experiment/run_offline_lp.py -------------------------------------------------------------------------------- /oat/oat/experiment/run_offline_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/experiment/run_offline_ppo.py -------------------------------------------------------------------------------- /oat/oat/experiment/run_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/experiment/run_ppo.py -------------------------------------------------------------------------------- /oat/oat/experiment/run_xpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/experiment/run_xpo.py -------------------------------------------------------------------------------- /oat/oat/exploration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/exploration.py -------------------------------------------------------------------------------- /oat/oat/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/interface.py -------------------------------------------------------------------------------- /oat/oat/learners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/__init__.py -------------------------------------------------------------------------------- /oat/oat/learners/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/learners/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/learners/__pycache__/dap.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/__pycache__/dap.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/learners/__pycache__/dap_with_rm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/__pycache__/dap_with_rm.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/learners/__pycache__/loss.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/__pycache__/loss.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/learners/__pycache__/ntp.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/__pycache__/ntp.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/learners/__pycache__/offline.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/__pycache__/offline.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/learners/__pycache__/offline_dap.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/__pycache__/offline_dap.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/learners/__pycache__/rl.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/__pycache__/rl.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/learners/__pycache__/sft.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/__pycache__/sft.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/learners/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/base.py -------------------------------------------------------------------------------- /oat/oat/learners/dap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/dap.py -------------------------------------------------------------------------------- /oat/oat/learners/dap_with_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/dap_with_rm.py -------------------------------------------------------------------------------- /oat/oat/learners/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/loss.py -------------------------------------------------------------------------------- /oat/oat/learners/ntp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/ntp.py -------------------------------------------------------------------------------- /oat/oat/learners/offline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/offline.py -------------------------------------------------------------------------------- /oat/oat/learners/offline_dap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/offline_dap.py -------------------------------------------------------------------------------- /oat/oat/learners/rl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/rl.py -------------------------------------------------------------------------------- /oat/oat/learners/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/learners/sft.py -------------------------------------------------------------------------------- /oat/oat/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/model.py -------------------------------------------------------------------------------- /oat/oat/multistep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/multistep.py -------------------------------------------------------------------------------- /oat/oat/oracles/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/__init__.py -------------------------------------------------------------------------------- /oat/oat/oracles/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/oracles/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/oracles/__pycache__/base.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/__pycache__/base.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/oracles/__pycache__/base.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/__pycache__/base.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/oracles/__pycache__/gpt.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/__pycache__/gpt.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/oracles/__pycache__/gpt.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/__pycache__/gpt.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/oracles/__pycache__/gsm8k.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/__pycache__/gsm8k.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/oracles/__pycache__/gsm8k.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/__pycache__/gsm8k.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/oracles/__pycache__/pair.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/__pycache__/pair.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/oracles/__pycache__/pair.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/__pycache__/pair.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/oracles/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/base.py -------------------------------------------------------------------------------- /oat/oat/oracles/countdown.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/countdown.py -------------------------------------------------------------------------------- /oat/oat/oracles/gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/gpt.py -------------------------------------------------------------------------------- /oat/oat/oracles/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/gsm8k.py -------------------------------------------------------------------------------- /oat/oat/oracles/pair.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/pair.py -------------------------------------------------------------------------------- /oat/oat/oracles/remote/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/remote/__init__.py -------------------------------------------------------------------------------- /oat/oat/oracles/remote/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/remote/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/oracles/remote/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/remote/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/oracles/remote/__pycache__/client.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/remote/__pycache__/client.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/oracles/remote/__pycache__/client.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/remote/__pycache__/client.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/oracles/remote/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/remote/client.py -------------------------------------------------------------------------------- /oat/oat/oracles/remote/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/oracles/remote/server.py -------------------------------------------------------------------------------- /oat/oat/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/prompts.py -------------------------------------------------------------------------------- /oat/oat/rm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/rm/__init__.py -------------------------------------------------------------------------------- /oat/oat/rm/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/rm/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/rm/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/rm/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/rm/__pycache__/backbone.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/rm/__pycache__/backbone.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/rm/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/rm/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/rm/__pycache__/model.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/rm/__pycache__/model.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/rm/__pycache__/networks.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/rm/__pycache__/networks.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/rm/__pycache__/networks.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/rm/__pycache__/networks.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/rm/__pycache__/uncertainty.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/rm/__pycache__/uncertainty.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/rm/__pycache__/uncertainty.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/rm/__pycache__/uncertainty.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/rm/backbone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/rm/backbone.py -------------------------------------------------------------------------------- /oat/oat/rm/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/rm/model.py -------------------------------------------------------------------------------- /oat/oat/rm/networks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/rm/networks.py -------------------------------------------------------------------------------- /oat/oat/rm/uncertainty.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/rm/uncertainty.py -------------------------------------------------------------------------------- /oat/oat/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/types.py -------------------------------------------------------------------------------- /oat/oat/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/__init__.py -------------------------------------------------------------------------------- /oat/oat/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/utils/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/utils/__pycache__/buffer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/__pycache__/buffer.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/utils/__pycache__/buffer.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/__pycache__/buffer.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/utils/__pycache__/data.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/__pycache__/data.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/utils/__pycache__/deepspeed.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/__pycache__/deepspeed.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/utils/__pycache__/distributed.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/__pycache__/distributed.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/utils/__pycache__/distributed.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/__pycache__/distributed.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/utils/__pycache__/ipc.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/__pycache__/ipc.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/utils/__pycache__/ipc.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/__pycache__/ipc.cpython-311.pyc -------------------------------------------------------------------------------- /oat/oat/utils/__pycache__/launcher.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/__pycache__/launcher.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/utils/__pycache__/ops.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/__pycache__/ops.cpython-310.pyc -------------------------------------------------------------------------------- /oat/oat/utils/buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/buffer.py -------------------------------------------------------------------------------- /oat/oat/utils/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/data.py -------------------------------------------------------------------------------- /oat/oat/utils/deepspeed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/deepspeed.py -------------------------------------------------------------------------------- /oat/oat/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/distributed.py -------------------------------------------------------------------------------- /oat/oat/utils/ipc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/ipc.py -------------------------------------------------------------------------------- /oat/oat/utils/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/launcher.py -------------------------------------------------------------------------------- /oat/oat/utils/ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/ops.py -------------------------------------------------------------------------------- /oat/oat/utils/slicer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/oat/utils/slicer.py -------------------------------------------------------------------------------- /oat/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/pyproject.toml -------------------------------------------------------------------------------- /oat/scripts/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/scripts/inference.py -------------------------------------------------------------------------------- /oat/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/setup.py -------------------------------------------------------------------------------- /oat/test/test_rm_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/oat/test/test_rm_server.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/requirements.txt -------------------------------------------------------------------------------- /run_debug.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/run_debug.sh -------------------------------------------------------------------------------- /seed.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/seed.txt -------------------------------------------------------------------------------- /seed_grpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/seed_grpo.py -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/setup.sh -------------------------------------------------------------------------------- /understand_r1_zero/__about__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/understand_r1_zero/__about__.py -------------------------------------------------------------------------------- /understand_r1_zero/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/understand_r1_zero/__init__.py -------------------------------------------------------------------------------- /understand_r1_zero/math_grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dreamer312/SEED-GRPO/HEAD/understand_r1_zero/math_grader.py --------------------------------------------------------------------------------