├── .gitignore
├── README.md
├── accelerate_configs
    ├── deepspeed_zero3.yaml
    └── deepspeed_zero3_cpu.yaml
├── app.py
├── bash_scrips
    ├── qwen2_72b_instruct_step_dpo.sh
    ├── qwen2_72b_step_dpo.sh
    └── qwen2_7b_step_dpo.sh
├── configs
    └── config_full.yaml
├── data
    └── test
    │   ├── GSM8K_test_data.jsonl
    │   └── MATH_test_data.jsonl
├── data_pipeline
    ├── generate_dataset.py
    ├── locate_error_by_gpt4.py
    ├── merge.sh
    ├── predictions
    │   └── sample.json
    ├── prepare_for_correction.py
    ├── step1.sh
    ├── step2.sh
    └── step3.sh
├── eval_math.py
├── eval_results
    ├── gsm8k
    │   └── sample.json
    └── math
    │   ├── qwen2-7b-dpo-v3-continue-from-incorrect-fix-part1-filtered+2-filtered+aqua-filtered-rej-original_acc0.6-topk1-beta0.5-8ep-fixbug-fixeos-bf16-keywords-fix.json
    │   └── sample.json
├── evaluation
    ├── data_processing
    │   ├── answer_extraction.py
    │   └── process_utils.py
    └── eval
    │   ├── eval_script.py
    │   ├── eval_utils.py
    │   ├── ocwcourses_eval_utils.py
    │   ├── python_executor.py
    │   └── utils.py
├── imgs
    ├── .DS_Store
    ├── coreidea.png
    ├── example1.png
    ├── example2.png
    ├── example3.png
    ├── example4.png
    ├── example5.jpg
    ├── summary.jpg
    └── triangle.png
├── licenses
    ├── DATA_LICENSE
    ├── LICENSE
    └── WEIGHT_LICENSE
├── paper
    └── paper.pdf
├── requirements.txt
├── stepdpo_trainer.py
└── train.py


/.gitignore:
--------------------------------------------------------------------------------
1 | **/__pycache__
2 | wandb/
3 | outputs/
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/README.md


--------------------------------------------------------------------------------
/accelerate_configs/deepspeed_zero3.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/accelerate_configs/deepspeed_zero3.yaml


--------------------------------------------------------------------------------
/accelerate_configs/deepspeed_zero3_cpu.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/accelerate_configs/deepspeed_zero3_cpu.yaml


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/app.py


--------------------------------------------------------------------------------
/bash_scrips/qwen2_72b_instruct_step_dpo.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/bash_scrips/qwen2_72b_instruct_step_dpo.sh


--------------------------------------------------------------------------------
/bash_scrips/qwen2_72b_step_dpo.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/bash_scrips/qwen2_72b_step_dpo.sh


--------------------------------------------------------------------------------
/bash_scrips/qwen2_7b_step_dpo.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/bash_scrips/qwen2_7b_step_dpo.sh


--------------------------------------------------------------------------------
/configs/config_full.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/configs/config_full.yaml


--------------------------------------------------------------------------------
/data/test/GSM8K_test_data.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/data/test/GSM8K_test_data.jsonl


--------------------------------------------------------------------------------
/data/test/MATH_test_data.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/data/test/MATH_test_data.jsonl


--------------------------------------------------------------------------------
/data_pipeline/generate_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/data_pipeline/generate_dataset.py


--------------------------------------------------------------------------------
/data_pipeline/locate_error_by_gpt4.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/data_pipeline/locate_error_by_gpt4.py


--------------------------------------------------------------------------------
/data_pipeline/merge.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/data_pipeline/merge.sh


--------------------------------------------------------------------------------
/data_pipeline/predictions/sample.json:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_pipeline/prepare_for_correction.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/data_pipeline/prepare_for_correction.py


--------------------------------------------------------------------------------
/data_pipeline/step1.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/data_pipeline/step1.sh


--------------------------------------------------------------------------------
/data_pipeline/step2.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/data_pipeline/step2.sh


--------------------------------------------------------------------------------
/data_pipeline/step3.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/data_pipeline/step3.sh


--------------------------------------------------------------------------------
/eval_math.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/eval_math.py


--------------------------------------------------------------------------------
/eval_results/gsm8k/sample.json:
--------------------------------------------------------------------------------
1 | []


--------------------------------------------------------------------------------
/eval_results/math/qwen2-7b-dpo-v3-continue-from-incorrect-fix-part1-filtered+2-filtered+aqua-filtered-rej-original_acc0.6-topk1-beta0.5-8ep-fixbug-fixeos-bf16-keywords-fix.json:
--------------------------------------------------------------------------------
1 | [
2 |     
3 | ]


--------------------------------------------------------------------------------
/eval_results/math/sample.json:
--------------------------------------------------------------------------------
1 | []


--------------------------------------------------------------------------------
/evaluation/data_processing/answer_extraction.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/evaluation/data_processing/answer_extraction.py


--------------------------------------------------------------------------------
/evaluation/data_processing/process_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/evaluation/data_processing/process_utils.py


--------------------------------------------------------------------------------
/evaluation/eval/eval_script.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/evaluation/eval/eval_script.py


--------------------------------------------------------------------------------
/evaluation/eval/eval_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/evaluation/eval/eval_utils.py


--------------------------------------------------------------------------------
/evaluation/eval/ocwcourses_eval_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/evaluation/eval/ocwcourses_eval_utils.py


--------------------------------------------------------------------------------
/evaluation/eval/python_executor.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/evaluation/eval/python_executor.py


--------------------------------------------------------------------------------
/evaluation/eval/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/evaluation/eval/utils.py


--------------------------------------------------------------------------------
/imgs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/imgs/.DS_Store


--------------------------------------------------------------------------------
/imgs/coreidea.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/imgs/coreidea.png


--------------------------------------------------------------------------------
/imgs/example1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/imgs/example1.png


--------------------------------------------------------------------------------
/imgs/example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/imgs/example2.png


--------------------------------------------------------------------------------
/imgs/example3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/imgs/example3.png


--------------------------------------------------------------------------------
/imgs/example4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/imgs/example4.png


--------------------------------------------------------------------------------
/imgs/example5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/imgs/example5.jpg


--------------------------------------------------------------------------------
/imgs/summary.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/imgs/summary.jpg


--------------------------------------------------------------------------------
/imgs/triangle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/imgs/triangle.png


--------------------------------------------------------------------------------
/licenses/DATA_LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/licenses/DATA_LICENSE


--------------------------------------------------------------------------------
/licenses/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/licenses/LICENSE


--------------------------------------------------------------------------------
/licenses/WEIGHT_LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/licenses/WEIGHT_LICENSE


--------------------------------------------------------------------------------
/paper/paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/paper/paper.pdf


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/requirements.txt


--------------------------------------------------------------------------------
/stepdpo_trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/stepdpo_trainer.py


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dvlab-research/Step-DPO/HEAD/train.py


--------------------------------------------------------------------------------