├── .DS_Store ├── README.md ├── configs ├── .DS_Store ├── deepspeed_stage1.json ├── deepspeed_stage2.json ├── deepspeed_stage3.json ├── training.yaml ├── zero0.yaml ├── zero2.yaml └── zero3.yaml ├── dpo_config.yaml ├── dpo_iteration ├── .DS_Store ├── dpo.py └── run_dpo.py ├── eval ├── evaluate.py └── grader.py ├── eval_math ├── LICENSE ├── README.md ├── data │ ├── aime24 │ │ └── test.jsonl │ ├── amc23 │ │ └── test.jsonl │ ├── aqua │ │ └── test.jsonl │ ├── asdiv │ │ └── test.jsonl │ ├── carp_en │ │ ├── demo.json │ │ └── test.jsonl │ ├── cmath │ │ └── test.jsonl │ ├── cn_middle_school │ │ └── test.jsonl │ ├── college_math │ │ └── test.jsonl │ ├── eval_rm_maj_example │ │ └── math_cot_100.jsonl │ ├── gaokao2023en │ │ └── test.jsonl │ ├── gaokao2024_I │ │ └── test.jsonl │ ├── gaokao2024_II │ │ └── test.jsonl │ ├── gaokao2024_mix │ │ └── test.jsonl │ ├── gaokao_math_cloze │ │ └── test.jsonl │ ├── gaokao_math_qa │ │ └── test.jsonl │ ├── gsm8k │ │ ├── test.jsonl │ │ └── train.jsonl │ ├── math │ │ ├── test.jsonl │ │ └── train.jsonl │ ├── math500 │ │ └── test.jsonl │ ├── mawps │ │ ├── addsub.jsonl │ │ ├── multiarith.jsonl │ │ ├── singleeq.jsonl │ │ ├── singleop.jsonl │ │ └── test.jsonl │ ├── minerva_math │ │ ├── README.md │ │ └── test.jsonl │ ├── mmlu_stem │ │ └── test.jsonl │ ├── olympiadbench │ │ ├── test.json │ │ └── test.jsonl │ ├── sat_math │ │ └── test.jsonl │ ├── svamp │ │ └── test.jsonl │ └── tabmwp │ │ └── test.jsonl ├── data_loader.py ├── evaluate.py ├── examples.py ├── grader.py ├── latex2sympy │ ├── .coveragerc │ ├── .gitignore │ ├── LICENSE.txt │ ├── PS.g4 │ ├── README.md │ ├── __init__.py │ ├── antlr-4.11.1-complete.jar │ ├── asciimath_printer.py │ ├── description.txt │ ├── dev-requirements.in │ ├── dev-requirements.txt │ ├── gen │ │ ├── PS.interp │ │ ├── PS.tokens │ │ ├── PSLexer.interp │ │ ├── PSLexer.py │ │ ├── PSLexer.tokens │ │ ├── PSListener.py │ │ ├── PSParser.py │ │ └── __init__.py │ ├── icon.png │ ├── latex2sympy2.py │ ├── requirements.in │ ├── requirements.txt │ ├── sandbox │ │ ├── linalg_equations.py │ │ ├── linalg_span.py │ │ ├── matrix.py │ │ ├── matrix_placeholders.py │ │ ├── sandbox.py │ │ ├── sandbox_equality.py │ │ ├── sectan.py │ │ └── vector.py │ ├── scripts │ │ ├── compile.sh │ │ ├── coverage-ci.sh │ │ ├── coverage.sh │ │ ├── pre-commit │ │ ├── pre-push │ │ ├── publish.sh │ │ ├── setup-hooks.sh │ │ ├── setup.sh │ │ └── test.sh │ ├── setup.cfg │ ├── setup.py │ └── tests │ │ ├── __init__.py │ │ ├── abs_test.py │ │ ├── all_bad_test.py │ │ ├── all_good_test.py │ │ ├── atom_expr_test.py │ │ ├── binomial_test.py │ │ ├── ceil_test.py │ │ ├── complex_test.py │ │ ├── context.py │ │ ├── exp_test.py │ │ ├── floor_test.py │ │ ├── gcd_test.py │ │ ├── greek_test.py │ │ ├── grouping_test.py │ │ ├── lcm_test.py │ │ ├── left_right_cdot_test.py │ │ ├── linalg_test.py │ │ ├── max_test.py │ │ ├── min_test.py │ │ ├── mod_test.py │ │ ├── overline_test.py │ │ ├── pi_test.py │ │ ├── trig_test.py │ │ └── variable_test.py ├── math_eval.py ├── math_utils.py ├── model_utils.py ├── parser.py ├── process.py ├── python_executor.py ├── requirements.txt ├── rm_maj_eval.py ├── sh │ ├── collect_results.py │ ├── convert_and_evaluate_gpu.sh │ ├── eval.sh │ ├── eval_single_node.sh │ └── run.sh ├── trajectory.py └── utils.py ├── figures └── dpo_overview.png ├── generation ├── .DS_Store ├── gen_hf.py ├── merge_data.py └── register_server.sh ├── ppo_training ├── README.md ├── numia_process.py └── verl_example.sh ├── reward_labeling.py ├── run_env_check.sh ├── run_iter_dpo.sh ├── trainning_data_annotation ├── .DS_Store └── annotate_data.py └── utils ├── .DS_Store ├── annotate_data.py ├── data_loader.py ├── filter_data.py ├── parser.py ├── python_executor.py └── utils.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/.DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/README.md -------------------------------------------------------------------------------- /configs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/configs/.DS_Store -------------------------------------------------------------------------------- /configs/deepspeed_stage1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/configs/deepspeed_stage1.json -------------------------------------------------------------------------------- /configs/deepspeed_stage2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/configs/deepspeed_stage2.json -------------------------------------------------------------------------------- /configs/deepspeed_stage3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/configs/deepspeed_stage3.json -------------------------------------------------------------------------------- /configs/training.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/configs/training.yaml -------------------------------------------------------------------------------- /configs/zero0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/configs/zero0.yaml -------------------------------------------------------------------------------- /configs/zero2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/configs/zero2.yaml -------------------------------------------------------------------------------- /configs/zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/configs/zero3.yaml -------------------------------------------------------------------------------- /dpo_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/dpo_config.yaml -------------------------------------------------------------------------------- /dpo_iteration/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/dpo_iteration/.DS_Store -------------------------------------------------------------------------------- /dpo_iteration/dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/dpo_iteration/dpo.py -------------------------------------------------------------------------------- /dpo_iteration/run_dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/dpo_iteration/run_dpo.py -------------------------------------------------------------------------------- /eval/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval/evaluate.py -------------------------------------------------------------------------------- /eval/grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval/grader.py -------------------------------------------------------------------------------- /eval_math/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/LICENSE -------------------------------------------------------------------------------- /eval_math/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/README.md -------------------------------------------------------------------------------- /eval_math/data/aime24/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/aime24/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/amc23/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/amc23/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/aqua/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/aqua/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/asdiv/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/asdiv/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/carp_en/demo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/carp_en/demo.json -------------------------------------------------------------------------------- /eval_math/data/carp_en/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/carp_en/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/cmath/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/cmath/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/cn_middle_school/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/cn_middle_school/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/college_math/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/college_math/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/eval_rm_maj_example/math_cot_100.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/eval_rm_maj_example/math_cot_100.jsonl -------------------------------------------------------------------------------- /eval_math/data/gaokao2023en/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/gaokao2023en/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/gaokao2024_I/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/gaokao2024_I/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/gaokao2024_II/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/gaokao2024_II/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/gaokao2024_mix/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/gaokao2024_mix/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/gaokao_math_cloze/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/gaokao_math_cloze/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/gaokao_math_qa/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/gaokao_math_qa/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/gsm8k/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/gsm8k/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/gsm8k/train.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/gsm8k/train.jsonl -------------------------------------------------------------------------------- /eval_math/data/math/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/math/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/math/train.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/math/train.jsonl -------------------------------------------------------------------------------- /eval_math/data/math500/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/math500/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/mawps/addsub.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/mawps/addsub.jsonl -------------------------------------------------------------------------------- /eval_math/data/mawps/multiarith.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/mawps/multiarith.jsonl -------------------------------------------------------------------------------- /eval_math/data/mawps/singleeq.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/mawps/singleeq.jsonl -------------------------------------------------------------------------------- /eval_math/data/mawps/singleop.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/mawps/singleop.jsonl -------------------------------------------------------------------------------- /eval_math/data/mawps/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/mawps/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/minerva_math/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/minerva_math/README.md -------------------------------------------------------------------------------- /eval_math/data/minerva_math/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/minerva_math/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/mmlu_stem/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/mmlu_stem/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/olympiadbench/test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/olympiadbench/test.json -------------------------------------------------------------------------------- /eval_math/data/olympiadbench/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/olympiadbench/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/sat_math/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/sat_math/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/svamp/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/svamp/test.jsonl -------------------------------------------------------------------------------- /eval_math/data/tabmwp/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data/tabmwp/test.jsonl -------------------------------------------------------------------------------- /eval_math/data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/data_loader.py -------------------------------------------------------------------------------- /eval_math/evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/evaluate.py -------------------------------------------------------------------------------- /eval_math/examples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/examples.py -------------------------------------------------------------------------------- /eval_math/grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/grader.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/.coveragerc -------------------------------------------------------------------------------- /eval_math/latex2sympy/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/.gitignore -------------------------------------------------------------------------------- /eval_math/latex2sympy/LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/LICENSE.txt -------------------------------------------------------------------------------- /eval_math/latex2sympy/PS.g4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/PS.g4 -------------------------------------------------------------------------------- /eval_math/latex2sympy/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/README.md -------------------------------------------------------------------------------- /eval_math/latex2sympy/__init__.py: -------------------------------------------------------------------------------- 1 | import latex2sympy -------------------------------------------------------------------------------- /eval_math/latex2sympy/antlr-4.11.1-complete.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/antlr-4.11.1-complete.jar -------------------------------------------------------------------------------- /eval_math/latex2sympy/asciimath_printer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/asciimath_printer.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/description.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/description.txt -------------------------------------------------------------------------------- /eval_math/latex2sympy/dev-requirements.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/dev-requirements.in -------------------------------------------------------------------------------- /eval_math/latex2sympy/dev-requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/dev-requirements.txt -------------------------------------------------------------------------------- /eval_math/latex2sympy/gen/PS.interp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/gen/PS.interp -------------------------------------------------------------------------------- /eval_math/latex2sympy/gen/PS.tokens: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/gen/PS.tokens -------------------------------------------------------------------------------- /eval_math/latex2sympy/gen/PSLexer.interp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/gen/PSLexer.interp -------------------------------------------------------------------------------- /eval_math/latex2sympy/gen/PSLexer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/gen/PSLexer.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/gen/PSLexer.tokens: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/gen/PSLexer.tokens -------------------------------------------------------------------------------- /eval_math/latex2sympy/gen/PSListener.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/gen/PSListener.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/gen/PSParser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/gen/PSParser.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/gen/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_math/latex2sympy/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/icon.png -------------------------------------------------------------------------------- /eval_math/latex2sympy/latex2sympy2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/latex2sympy2.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/requirements.in: -------------------------------------------------------------------------------- 1 | sympy 2 | antlr4-python3-runtime 3 | -------------------------------------------------------------------------------- /eval_math/latex2sympy/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/requirements.txt -------------------------------------------------------------------------------- /eval_math/latex2sympy/sandbox/linalg_equations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/sandbox/linalg_equations.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/sandbox/linalg_span.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/sandbox/linalg_span.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/sandbox/matrix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/sandbox/matrix.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/sandbox/matrix_placeholders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/sandbox/matrix_placeholders.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/sandbox/sandbox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/sandbox/sandbox.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/sandbox/sandbox_equality.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/sandbox/sandbox_equality.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/sandbox/sectan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/sandbox/sectan.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/sandbox/vector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/sandbox/vector.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/scripts/compile.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/scripts/compile.sh -------------------------------------------------------------------------------- /eval_math/latex2sympy/scripts/coverage-ci.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/scripts/coverage-ci.sh -------------------------------------------------------------------------------- /eval_math/latex2sympy/scripts/coverage.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/scripts/coverage.sh -------------------------------------------------------------------------------- /eval_math/latex2sympy/scripts/pre-commit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/scripts/pre-commit -------------------------------------------------------------------------------- /eval_math/latex2sympy/scripts/pre-push: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/scripts/pre-push -------------------------------------------------------------------------------- /eval_math/latex2sympy/scripts/publish.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/scripts/publish.sh -------------------------------------------------------------------------------- /eval_math/latex2sympy/scripts/setup-hooks.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/scripts/setup-hooks.sh -------------------------------------------------------------------------------- /eval_math/latex2sympy/scripts/setup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/scripts/setup.sh -------------------------------------------------------------------------------- /eval_math/latex2sympy/scripts/test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/scripts/test.sh -------------------------------------------------------------------------------- /eval_math/latex2sympy/setup.cfg: -------------------------------------------------------------------------------- 1 | [pycodestyle] 2 | max-line-length = 120 3 | ignore = E501 4 | -------------------------------------------------------------------------------- /eval_math/latex2sympy/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/setup.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/abs_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/abs_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/all_bad_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/all_bad_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/all_good_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/all_good_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/atom_expr_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/atom_expr_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/binomial_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/binomial_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/ceil_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/ceil_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/complex_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/complex_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/context.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/exp_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/exp_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/floor_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/floor_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/gcd_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/gcd_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/greek_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/greek_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/grouping_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/grouping_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/lcm_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/lcm_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/left_right_cdot_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/left_right_cdot_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/linalg_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/linalg_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/max_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/max_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/min_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/min_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/mod_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/mod_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/overline_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/overline_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/pi_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/pi_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/trig_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/trig_test.py -------------------------------------------------------------------------------- /eval_math/latex2sympy/tests/variable_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/latex2sympy/tests/variable_test.py -------------------------------------------------------------------------------- /eval_math/math_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/math_eval.py -------------------------------------------------------------------------------- /eval_math/math_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/math_utils.py -------------------------------------------------------------------------------- /eval_math/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/model_utils.py -------------------------------------------------------------------------------- /eval_math/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/parser.py -------------------------------------------------------------------------------- /eval_math/process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/process.py -------------------------------------------------------------------------------- /eval_math/python_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/python_executor.py -------------------------------------------------------------------------------- /eval_math/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/requirements.txt -------------------------------------------------------------------------------- /eval_math/rm_maj_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/rm_maj_eval.py -------------------------------------------------------------------------------- /eval_math/sh/collect_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/sh/collect_results.py -------------------------------------------------------------------------------- /eval_math/sh/convert_and_evaluate_gpu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/sh/convert_and_evaluate_gpu.sh -------------------------------------------------------------------------------- /eval_math/sh/eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/sh/eval.sh -------------------------------------------------------------------------------- /eval_math/sh/eval_single_node.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/sh/eval_single_node.sh -------------------------------------------------------------------------------- /eval_math/sh/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/sh/run.sh -------------------------------------------------------------------------------- /eval_math/trajectory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/trajectory.py -------------------------------------------------------------------------------- /eval_math/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/eval_math/utils.py -------------------------------------------------------------------------------- /figures/dpo_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/figures/dpo_overview.png -------------------------------------------------------------------------------- /generation/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/generation/.DS_Store -------------------------------------------------------------------------------- /generation/gen_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/generation/gen_hf.py -------------------------------------------------------------------------------- /generation/merge_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/generation/merge_data.py -------------------------------------------------------------------------------- /generation/register_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/generation/register_server.sh -------------------------------------------------------------------------------- /ppo_training/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/ppo_training/README.md -------------------------------------------------------------------------------- /ppo_training/numia_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/ppo_training/numia_process.py -------------------------------------------------------------------------------- /ppo_training/verl_example.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/ppo_training/verl_example.sh -------------------------------------------------------------------------------- /reward_labeling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/reward_labeling.py -------------------------------------------------------------------------------- /run_env_check.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/run_env_check.sh -------------------------------------------------------------------------------- /run_iter_dpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/run_iter_dpo.sh -------------------------------------------------------------------------------- /trainning_data_annotation/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/trainning_data_annotation/.DS_Store -------------------------------------------------------------------------------- /trainning_data_annotation/annotate_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/trainning_data_annotation/annotate_data.py -------------------------------------------------------------------------------- /utils/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/utils/.DS_Store -------------------------------------------------------------------------------- /utils/annotate_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/utils/annotate_data.py -------------------------------------------------------------------------------- /utils/data_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/utils/data_loader.py -------------------------------------------------------------------------------- /utils/filter_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/utils/filter_data.py -------------------------------------------------------------------------------- /utils/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/utils/parser.py -------------------------------------------------------------------------------- /utils/python_executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/utils/python_executor.py -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RLHFlow/Online-DPO-R1/HEAD/utils/utils.py --------------------------------------------------------------------------------