├── README.md ├── data_prepare.py ├── dataset.py ├── dpo_train.py ├── eval ├── Coding │ ├── human_eval │ │ ├── data │ │ │ └── HumanEval.jsonl.gz │ │ └── evaluate_human_eval.py │ └── mbpp │ │ ├── evaluate_mbpp.py │ │ ├── mbpp_completion.json │ │ ├── new_mbpp.json │ │ └── result.txt ├── Math │ ├── math │ │ ├── evaluate_math_cot.py │ │ ├── evaluate_math_pot.py │ │ ├── math_test_cleaned.json │ │ ├── results.json │ │ └── results.txt │ └── subset │ │ ├── asdiv-eurus-res │ │ ├── asdiv_results.json │ │ ├── asdiv_results.txt │ │ └── total_results.txt │ │ ├── asdiv-eurus-sft-res │ │ ├── SVAMP_results.json │ │ ├── SVAMP_results.txt │ │ └── total_results.txt │ │ ├── asdiv-res-sft │ │ ├── asdiv_results.json │ │ ├── asdiv_results.txt │ │ └── total_results.txt │ │ ├── asdiv-res │ │ ├── asdiv_results.json │ │ ├── asdiv_results.txt │ │ └── total_results.txt │ │ ├── data │ │ ├── SVAMP.json │ │ ├── asdiv.json │ │ └── gsmplus_test.json │ │ ├── evaluate_subset_cot.py │ │ ├── evaluate_subset_pot.py │ │ ├── svamp-eurus-res │ │ ├── SVAMP_results.json │ │ ├── SVAMP_results.txt │ │ └── total_results.txt │ │ ├── svamp-eurus-sft-res │ │ ├── SVAMP_results.json │ │ ├── SVAMP_results.txt │ │ ├── asdiv_results.json │ │ ├── asdiv_results.txt │ │ └── total_results.txt │ │ ├── svamp-res-sft │ │ ├── SVAMP_results.json │ │ ├── SVAMP_results.txt │ │ └── total_results.txt │ │ └── svamp-res │ │ ├── SVAMP_results.json │ │ ├── SVAMP_results.txt │ │ └── total_results.txt ├── requirements.txt ├── run.sh └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-311.pyc │ ├── data.cpython-310.pyc │ ├── data.cpython-311.pyc │ ├── evaluation.cpython-310.pyc │ ├── evaluation.cpython-311.pyc │ ├── execution.cpython-310.pyc │ ├── execution.cpython-311.pyc │ ├── grader.cpython-311.pyc │ ├── math_equivalence.cpython-311.pyc │ └── util.cpython-311.pyc │ ├── data.py │ ├── evaluation.py │ ├── execution.py │ ├── grader.py │ ├── math_equivalence.py │ ├── python_interpreter.py │ └── util.py ├── evaluate.py ├── figures └── framework.pdf ├── loss.py ├── requirements.txt └── tpo_train.py /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/README.md -------------------------------------------------------------------------------- /data_prepare.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/data_prepare.py -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/dataset.py -------------------------------------------------------------------------------- /dpo_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/dpo_train.py -------------------------------------------------------------------------------- /eval/Coding/human_eval/data/HumanEval.jsonl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Coding/human_eval/data/HumanEval.jsonl.gz -------------------------------------------------------------------------------- /eval/Coding/human_eval/evaluate_human_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Coding/human_eval/evaluate_human_eval.py -------------------------------------------------------------------------------- /eval/Coding/mbpp/evaluate_mbpp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Coding/mbpp/evaluate_mbpp.py -------------------------------------------------------------------------------- /eval/Coding/mbpp/mbpp_completion.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Coding/mbpp/mbpp_completion.json -------------------------------------------------------------------------------- /eval/Coding/mbpp/new_mbpp.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Coding/mbpp/new_mbpp.json -------------------------------------------------------------------------------- /eval/Coding/mbpp/result.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Coding/mbpp/result.txt -------------------------------------------------------------------------------- /eval/Math/math/evaluate_math_cot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/math/evaluate_math_cot.py -------------------------------------------------------------------------------- /eval/Math/math/evaluate_math_pot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/math/evaluate_math_pot.py -------------------------------------------------------------------------------- /eval/Math/math/math_test_cleaned.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/math/math_test_cleaned.json -------------------------------------------------------------------------------- /eval/Math/math/results.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/math/results.json -------------------------------------------------------------------------------- /eval/Math/math/results.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/math/results.txt -------------------------------------------------------------------------------- /eval/Math/subset/asdiv-eurus-res/asdiv_results.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/asdiv-eurus-res/asdiv_results.json -------------------------------------------------------------------------------- /eval/Math/subset/asdiv-eurus-res/asdiv_results.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/asdiv-eurus-res/asdiv_results.txt -------------------------------------------------------------------------------- /eval/Math/subset/asdiv-eurus-res/total_results.txt: -------------------------------------------------------------------------------- 1 | asdiv Accuracy = 509/618 = 0.824 2 | -------------------------------------------------------------------------------- /eval/Math/subset/asdiv-eurus-sft-res/SVAMP_results.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/asdiv-eurus-sft-res/SVAMP_results.json -------------------------------------------------------------------------------- /eval/Math/subset/asdiv-eurus-sft-res/SVAMP_results.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/asdiv-eurus-sft-res/SVAMP_results.txt -------------------------------------------------------------------------------- /eval/Math/subset/asdiv-eurus-sft-res/total_results.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/asdiv-eurus-sft-res/total_results.txt -------------------------------------------------------------------------------- /eval/Math/subset/asdiv-res-sft/asdiv_results.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/asdiv-res-sft/asdiv_results.json -------------------------------------------------------------------------------- /eval/Math/subset/asdiv-res-sft/asdiv_results.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/asdiv-res-sft/asdiv_results.txt -------------------------------------------------------------------------------- /eval/Math/subset/asdiv-res-sft/total_results.txt: -------------------------------------------------------------------------------- 1 | asdiv Accuracy = 1/618 = 0.002 2 | -------------------------------------------------------------------------------- /eval/Math/subset/asdiv-res/asdiv_results.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/asdiv-res/asdiv_results.json -------------------------------------------------------------------------------- /eval/Math/subset/asdiv-res/asdiv_results.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/asdiv-res/asdiv_results.txt -------------------------------------------------------------------------------- /eval/Math/subset/asdiv-res/total_results.txt: -------------------------------------------------------------------------------- 1 | asdiv Accuracy = 1/618 = 0.002 2 | -------------------------------------------------------------------------------- /eval/Math/subset/data/SVAMP.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/data/SVAMP.json -------------------------------------------------------------------------------- /eval/Math/subset/data/asdiv.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/data/asdiv.json -------------------------------------------------------------------------------- /eval/Math/subset/data/gsmplus_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/data/gsmplus_test.json -------------------------------------------------------------------------------- /eval/Math/subset/evaluate_subset_cot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/evaluate_subset_cot.py -------------------------------------------------------------------------------- /eval/Math/subset/evaluate_subset_pot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/evaluate_subset_pot.py -------------------------------------------------------------------------------- /eval/Math/subset/svamp-eurus-res/SVAMP_results.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/svamp-eurus-res/SVAMP_results.json -------------------------------------------------------------------------------- /eval/Math/subset/svamp-eurus-res/SVAMP_results.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/svamp-eurus-res/SVAMP_results.txt -------------------------------------------------------------------------------- /eval/Math/subset/svamp-eurus-res/total_results.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/svamp-eurus-res/total_results.txt -------------------------------------------------------------------------------- /eval/Math/subset/svamp-eurus-sft-res/SVAMP_results.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/svamp-eurus-sft-res/SVAMP_results.json -------------------------------------------------------------------------------- /eval/Math/subset/svamp-eurus-sft-res/SVAMP_results.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/svamp-eurus-sft-res/SVAMP_results.txt -------------------------------------------------------------------------------- /eval/Math/subset/svamp-eurus-sft-res/asdiv_results.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/svamp-eurus-sft-res/asdiv_results.json -------------------------------------------------------------------------------- /eval/Math/subset/svamp-eurus-sft-res/asdiv_results.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/svamp-eurus-sft-res/asdiv_results.txt -------------------------------------------------------------------------------- /eval/Math/subset/svamp-eurus-sft-res/total_results.txt: -------------------------------------------------------------------------------- 1 | asdiv Accuracy = 495/618 = 0.801 2 | -------------------------------------------------------------------------------- /eval/Math/subset/svamp-res-sft/SVAMP_results.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/svamp-res-sft/SVAMP_results.json -------------------------------------------------------------------------------- /eval/Math/subset/svamp-res-sft/SVAMP_results.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/svamp-res-sft/SVAMP_results.txt -------------------------------------------------------------------------------- /eval/Math/subset/svamp-res-sft/total_results.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/svamp-res-sft/total_results.txt -------------------------------------------------------------------------------- /eval/Math/subset/svamp-res/SVAMP_results.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/svamp-res/SVAMP_results.json -------------------------------------------------------------------------------- /eval/Math/subset/svamp-res/SVAMP_results.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/svamp-res/SVAMP_results.txt -------------------------------------------------------------------------------- /eval/Math/subset/svamp-res/total_results.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/Math/subset/svamp-res/total_results.txt -------------------------------------------------------------------------------- /eval/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/requirements.txt -------------------------------------------------------------------------------- /eval/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/run.sh -------------------------------------------------------------------------------- /eval/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /eval/utils/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /eval/utils/__pycache__/data.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/__pycache__/data.cpython-310.pyc -------------------------------------------------------------------------------- /eval/utils/__pycache__/data.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/__pycache__/data.cpython-311.pyc -------------------------------------------------------------------------------- /eval/utils/__pycache__/evaluation.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/__pycache__/evaluation.cpython-310.pyc -------------------------------------------------------------------------------- /eval/utils/__pycache__/evaluation.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/__pycache__/evaluation.cpython-311.pyc -------------------------------------------------------------------------------- /eval/utils/__pycache__/execution.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/__pycache__/execution.cpython-310.pyc -------------------------------------------------------------------------------- /eval/utils/__pycache__/execution.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/__pycache__/execution.cpython-311.pyc -------------------------------------------------------------------------------- /eval/utils/__pycache__/grader.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/__pycache__/grader.cpython-311.pyc -------------------------------------------------------------------------------- /eval/utils/__pycache__/math_equivalence.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/__pycache__/math_equivalence.cpython-311.pyc -------------------------------------------------------------------------------- /eval/utils/__pycache__/util.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/__pycache__/util.cpython-311.pyc -------------------------------------------------------------------------------- /eval/utils/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/data.py -------------------------------------------------------------------------------- /eval/utils/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/evaluation.py -------------------------------------------------------------------------------- /eval/utils/execution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/execution.py -------------------------------------------------------------------------------- /eval/utils/grader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/grader.py -------------------------------------------------------------------------------- /eval/utils/math_equivalence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/math_equivalence.py -------------------------------------------------------------------------------- /eval/utils/python_interpreter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/python_interpreter.py -------------------------------------------------------------------------------- /eval/utils/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/eval/utils/util.py -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/evaluate.py -------------------------------------------------------------------------------- /figures/framework.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/figures/framework.pdf -------------------------------------------------------------------------------- /loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/loss.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/requirements.txt -------------------------------------------------------------------------------- /tpo_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrBlankness/TPO/HEAD/tpo_train.py --------------------------------------------------------------------------------