├── README.md ├── Untitled.ipynb ├── conf ├── api │ ├── gpt35turbo │ │ ├── ar_lsat │ │ │ └── dev_react_1shot_v1_0.yaml │ │ ├── logiqav2 │ │ │ └── dev_react_v1_0_1shot.yaml │ │ └── reclor │ │ │ ├── dev_react_v1_0_1shot.yaml │ │ │ └── train_react_v1_0_1shot_sample10.yaml │ ├── gpt4 │ │ ├── compare_response │ │ │ └── react_response_cmp_0shot_v1_0.yaml │ │ ├── logiqav2 │ │ │ └── dev_react_v1_0_1shot.yaml │ │ └── reclor │ │ │ └── dev_react_v1_0_1shot.yaml │ └── vllm │ │ ├── llama2-70b │ │ ├── ar_lsat │ │ │ ├── dev_react_1shot_v2_0.yaml │ │ │ ├── dev_react_v1_0.yaml │ │ │ └── dev_react_v1_1.yaml │ │ ├── logiqav2 │ │ │ ├── dev_0shot_v1_0_service.yaml │ │ │ └── react_dev_1shot_v1_0_service.yaml │ │ └── reclor │ │ │ ├── dev_0shot_v1_0_service.yaml │ │ │ ├── dev_react_1shot_v1_0.yaml │ │ │ └── train_react_1shot_sample5_v1_0.yaml │ │ ├── llama2-7b │ │ ├── ar_lsat_tems │ │ │ ├── dev_react_v1_0.yaml │ │ │ └── test_react_v1_0.yaml │ │ ├── folio_tems │ │ │ └── react_dev_0shot_tem_v1_0.yaml │ │ ├── logiqav2_qa_dev_react_dpo_v1_0.yaml │ │ ├── logiqav2_qa_dev_react_dpo_v2_0.yaml │ │ ├── logiqav2_qa_dev_react_dpo_v2_0_0shot.yaml │ │ ├── logiqav2_qa_dev_react_dpo_v4_1.yaml │ │ ├── logiqav2_qa_dev_react_dpo_v4_1_0shot.yaml │ │ ├── logiqav2_qa_dev_react_sft_v2_0.yaml │ │ ├── logiqav2_qa_dev_react_sft_v2_0_0shot.yaml │ │ ├── logiqav2_qa_dev_react_step_dpo_v2_0.yaml │ │ ├── logiqav2_qa_dev_react_step_dpo_v2_0_0shot.yaml │ │ ├── logiqav2_qa_dev_react_step_dpo_v3_0.yaml │ │ ├── logiqav2_qa_dev_react_step_dpo_v4_0.yaml │ │ ├── logiqav2_qa_dev_react_step_dpo_v4_1_1.yaml │ │ ├── logiqav2_qa_dev_react_step_dpo_v4_3.yaml │ │ ├── logiqav2_qa_dev_react_step_dpo_v5_0.yaml │ │ ├── logiqav2_qa_dev_react_step_dpo_v5_0_0shot.yaml │ │ ├── logiqav2_qa_dev_react_step_dpo_v5_0_fix_0shot.yaml │ │ ├── logiqav2_qa_dev_react_step_dpo_v5_0_fix_1shot.yaml │ │ ├── logiqav2_qa_dev_react_step_dpo_v5_1_0shot.yaml │ │ ├── logiqav2_qa_dev_react_v1_0.yaml │ │ ├── logiqav2_qa_react_0shot_tem_v1_0.yaml │ │ ├── logiqav2_qa_react_70bdistil_dpo_test_0shot_tem_v1_0.yaml │ │ ├── logiqav2_qa_react_70bdistil_step_dpo_test_0shot_tem_v1_0.yaml │ │ ├── logiqav2_qa_react_dpo_test_0shot_tem_v1_0_s0.yaml │ │ ├── logiqav2_qa_react_test_0shot_tem_v1_0.yaml │ │ ├── logiqav2_qa_react_test_0shot_tem_v1_0_s0.yaml │ │ ├── logiqav2_qa_react_turbosft_test_0shot_tem_v1_0_s0.yaml │ │ ├── logiqav2_qa_rest_train_react_v1_0_0shot_sample.yaml │ │ ├── logiqav2_qa_sft70bdistil_dev_react_v1_0_0shot.yaml │ │ ├── logiqav2_qa_sft70bdistil_dev_react_v1_0_1shot.yaml │ │ ├── logiqav2_qa_sft70bdistil_train_react_v1_0_0shot_sample20.yaml │ │ ├── logiqav2_qa_sft70bdistil_train_react_v1_0_inter_0shot_sample3.yaml │ │ ├── logiqav2_qa_train_react_sft_v2_0_1shot_sample20.yaml │ │ ├── logiqav2_qa_train_react_sft_v2_0_inter_0shot_sample3.yaml │ │ ├── logiqav2_qa_train_react_sft_v2_0_inter_1shot_sample3.yaml │ │ ├── logiqav2_tems │ │ │ ├── logiqav2_qa_train_react_v1_0_0shot_modify.yaml │ │ │ ├── logiqav2_qa_train_react_v1_0_1shot_modify.yaml │ │ │ ├── logiqav2_qa_train_react_v1_0_inter_0shot_modify.yaml │ │ │ ├── logiqav2_qa_train_react_v1_0_inter_0shot_sample3.yaml │ │ │ ├── logiqav2_qa_train_react_v1_0_inter_1shot_modify.yaml │ │ │ ├── react_dev_0shot_tem_v1_0.yaml │ │ │ ├── react_dev_0shot_tem_v1_0_o1.yaml │ │ │ ├── react_dev_0shot_tem_v1_0_o2.yaml │ │ │ ├── react_dev_0shot_tem_v2_0.yaml │ │ │ ├── react_dev_0shot_tem_v2_1.yaml │ │ │ ├── react_dev_0shot_tem_v2_1_sc.yaml │ │ │ ├── react_test_0shot_tem_v1_0.yaml │ │ │ ├── react_test_0shot_tem_v1_0_o1.yaml │ │ │ ├── react_test_0shot_tem_v1_0_o2.yaml │ │ │ ├── react_test_0shot_tem_v2_0.yaml │ │ │ ├── react_test_0shot_tem_v2_1.yaml │ │ │ ├── react_test_0shot_tem_v2_1_sc.yaml │ │ │ ├── react_test_1shot_tem_v2_1.yaml │ │ │ ├── react_train_0shot_sample_tem_v2_0.yaml │ │ │ ├── react_train_0shot_v1_0_sample_service.yaml │ │ │ └── react_train_inter_0shot_v1_0_sample3_service.yaml │ │ └── reclor_tems │ │ │ ├── dev_react_0shot_v1_0_service.yaml │ │ │ ├── dev_react_0shot_v1_0_vllm.yaml │ │ │ ├── dev_react_0shot_v1_1_vllm.yaml │ │ │ ├── dev_react_0shot_v1_1_vllm_sc.yaml │ │ │ ├── test_react_0shot_v1_0_vllm.yaml │ │ │ ├── test_react_0shot_v1_1_vllm.yaml │ │ │ ├── test_react_0shot_v1_1_vllm_sc.yaml │ │ │ ├── train_react_0shot_v1_0_sample_service.yaml │ │ │ ├── train_react_v1_0_inter_0shot_sample3.yaml │ │ │ └── train_react_v1_0_inter_0shot_sample3_vllm.yaml │ │ ├── logiqav2_qa_dev_decompose_dpo_v2_0.yaml │ │ ├── logiqav2_qa_dev_decompose_dpo_v2_0_cp1800.yaml │ │ ├── logiqav2_qa_dev_decompose_sft_v1_0.yaml │ │ ├── logiqav2_qa_dev_react_step_dpo_v1_0.yaml │ │ ├── math │ │ ├── gsm8k_deepseek_test_0shot_tem_v1_1.yaml │ │ ├── gsm8k_gemma_test_0shot_tem_v1_0.yaml │ │ ├── gsm8k_gemma_test_0shot_tem_v1_1.yaml │ │ ├── math_deepseek_test_0shot_tem_v1_1.yaml │ │ ├── math_gemma_test_0shot_tem_v1_0.yaml │ │ ├── math_gemma_test_0shot_tem_v1_1.yaml │ │ ├── meta_cot_train_0shot_completion_v1_0_sample.yaml │ │ ├── meta_cot_train_0shot_v1_0_sample.yaml │ │ ├── meta_deepseek_cot_train_0shot_completion_v1_0_sample.yaml │ │ ├── meta_deepseek_cot_train_0shot_v1_0_sample.yaml │ │ ├── meta_sub_rap_train_0shot_completion_v1_0_sample.yaml │ │ ├── meta_sub_rap_train_0shot_v1_0_sample.yaml │ │ ├── meta_sub_rap_train_1shot_v1_0_sample.yaml │ │ └── service │ │ │ ├── cot_train_0shot_completion_v1_0_sample.yaml │ │ │ └── cot_train_0shot_v1_0_sample.yaml │ │ ├── minicpm │ │ └── logiqav2 │ │ │ ├── react_dev_0shot_v1_0_service.yaml │ │ │ └── react_dev_1shot_v1_0_service.yaml │ │ ├── mistral │ │ ├── logiqav2 │ │ │ ├── dev_0shot_v1_0_service.yaml │ │ │ └── tems │ │ │ │ ├── react_test_1shot_tem_v1_0.yaml │ │ │ │ ├── react_test_1shot_tem_v1_1.yaml │ │ │ │ └── react_test_1shot_tem_v2_0.yaml │ │ └── reclor │ │ │ ├── dev_0shot_v1_0_service.yaml │ │ │ ├── dev_react_1shot_v1_0.yaml │ │ │ ├── train_react_1shot_sample5_split_v1_0.yaml │ │ │ └── train_react_1shot_sample5_v1_0.yaml │ │ ├── qwen │ │ └── logiqav2 │ │ │ ├── react_dev_0shot_v1_0_service.yaml │ │ │ └── react_dev_1shot_v1_0_service.yaml │ │ └── vllm_params │ │ ├── sampling_param_greedy.yaml │ │ └── sampling_param_sample.yaml ├── deepspeed │ ├── train_hybrid_engine_zero1.yaml │ ├── train_hybrid_engine_zero1_lr.yaml │ ├── train_hybrid_engine_zero1_optim_offload.yaml │ ├── train_hybrid_engine_zero1_optim_offload_cosine.yaml │ ├── train_hybrid_engine_zero1_optim_offload_lr.yaml │ ├── train_hybrid_engine_zero2.yaml │ ├── train_hybrid_engine_zero2_lr.yaml │ ├── train_hybrid_engine_zero2_optim_offload.yaml │ └── train_hybrid_engine_zero3.yaml ├── engines │ └── remax_defaults.yaml ├── exp │ ├── dpo │ │ ├── deepseek │ │ │ └── meta_math_cot │ │ │ │ ├── dpo_v5_0.yaml │ │ │ │ └── step_dpo_v5_1.yaml │ │ ├── gemma │ │ │ └── meta_math_rap │ │ │ │ ├── dpo_v1_0.yaml │ │ │ │ ├── dpo_v1_1.yaml │ │ │ │ ├── dpo_v1_1_1.yaml │ │ │ │ ├── dpo_v1_1_a100_40.yaml │ │ │ │ ├── dpo_v2_0.yaml │ │ │ │ ├── step_dpo_v1_0.yaml │ │ │ │ ├── step_dpo_v1_0_1.yaml │ │ │ │ ├── step_dpo_v1_1.yaml │ │ │ │ ├── step_dpo_v1_1_1.yaml │ │ │ │ ├── step_dpo_v1_1_a6k.yaml │ │ │ │ ├── step_dpo_v1_2.yaml │ │ │ │ ├── step_dpo_v1_2_1.yaml │ │ │ │ ├── step_dpo_v1_2_2.yaml │ │ │ │ ├── step_dpo_v1_2_a6k.yaml │ │ │ │ ├── step_dpo_v1_3.yaml │ │ │ │ ├── step_dpo_v1_4.yaml │ │ │ │ ├── step_dpo_v1_5.yaml │ │ │ │ ├── step_dpo_v1_6.yaml │ │ │ │ ├── step_dpo_v1_6_1.yaml │ │ │ │ ├── step_dpo_v2_0.yaml │ │ │ │ ├── step_dpo_v2_1.yaml │ │ │ │ └── step_dpo_v2_2.yaml │ │ ├── logiqav2 │ │ │ ├── llama2_70b_step_dpo_v1_0_th.yaml │ │ │ ├── llama2_7b_70bdistil_dpo_iter1_v1_2.yaml │ │ │ ├── llama2_7b_70bdistil_dpo_v1_0.yaml │ │ │ ├── llama2_7b_70bdistil_dpo_v1_0_A40w8.yaml │ │ │ ├── llama2_7b_70bdistil_dpo_v1_0_iter1.yaml │ │ │ ├── llama2_7b_70bdistil_dpo_v1_0_th.yaml │ │ │ ├── llama2_7b_70bdistil_dpo_v1_0_th_test.yaml │ │ │ ├── llama2_7b_70bdistil_dpo_v1_0_th_test_ratio.yaml │ │ │ ├── llama2_7b_70bdistil_dpo_v1_0_w2.yaml │ │ │ ├── llama2_7b_70bdistil_dpo_v1_1_iter1.yaml │ │ │ ├── llama2_7b_70bdistil_dpo_v1_1_th.yaml │ │ │ ├── llama2_7b_70bdistil_dpo_v1_1_w2.yaml │ │ │ ├── llama2_7b_70bdistil_dpo_v1_1_w4.yaml │ │ │ ├── llama2_7b_70bdistil_dpo_v1_1_w8.yaml │ │ │ ├── llama2_7b_70bdistil_dpo_v1_2.yaml │ │ │ ├── llama2_7b_70bdistil_ipo_v1_0_th_test.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_0.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_10.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_11.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_2_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_2_2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_2_3.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_2_replay.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_3.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_4.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_5.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_5_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_6.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_7.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_8.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_8_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v2_9.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v3_0.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v3_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v3_2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v3_3.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v3_4.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v3_5.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v3_6.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v3_7.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v4_0.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v4_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v5_0.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v5_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v6_0.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v6_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_iter1_v6_2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_raw_v2_0_th.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_raw_v2_1_th.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_0.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_0_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_0_1_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_0_2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_0_3.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_0_3_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_0_4.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_0_4_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_0_4_2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_0_iter1_th.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_0_th.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_0_th_margin3.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_0_th_margin7.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_0_th_ratio.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_0_th_w2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_0_w2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_1_iter1_th.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_1_th.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_1_th_a40w8.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_1_th_w2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_2_iter1_th.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_2_th_a40w8.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_2_w4.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_3.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_4.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_5.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_6.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_7.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_8.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_9.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v3_0_sum_th.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v3_1_sum_th.yaml │ │ │ └── llama2_7b_70bdistil_step_ipo_v1_0_th.yaml │ │ ├── out-of-date │ │ │ ├── llama2_7b_70bdistil_dpo_v1_0.yaml │ │ │ ├── llama2_7b_70bdistil_dpo_v1_0_4k.yaml │ │ │ ├── llama2_7b_70bdistil_dpo_v1_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_0.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_10.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_2_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_2_2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_2_3.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_2_4.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_2_5.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_3.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_3_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_4.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_4_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_4_2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_5.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_6.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_7.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_8.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_8_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v1_9.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v2_0.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v2_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v2_2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v2_3.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v2_3_prefix.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v3_0.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v3_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v3_2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v3_3.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v4_0.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v4_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v5_0.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v5_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v6_0.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v6_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v6_2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v6_3.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v6_4.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v6_5.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_0.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_10.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_11.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_3.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_4.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_4_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_5.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_6.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_7.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_8.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_9.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_9_1.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_9_2.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_9_3.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_9_3_h100.yaml │ │ │ ├── llama2_7b_70bdistil_step_dpo_v7_9_4.yaml │ │ │ ├── llama2_7b_distil_dpo_v1_0.yaml │ │ │ ├── llama2_7b_distil_dpo_v1_1.yaml │ │ │ ├── llama2_7b_distil_dpo_v1_2.yaml │ │ │ ├── llama2_7b_distil_dpo_v2_0.yaml │ │ │ ├── llama2_7b_dpo_v1_0.yaml │ │ │ ├── llama2_7b_dpo_v2_0.yaml │ │ │ ├── llama2_7b_dpo_v3_0.yaml │ │ │ ├── llama2_7b_dpo_v4_0.yaml │ │ │ ├── llama2_7b_dpo_v4_1.yaml │ │ │ ├── llama2_7b_step_dpo_v1_0.yaml │ │ │ ├── llama2_7b_step_dpo_v1_1.yaml │ │ │ ├── llama2_7b_step_dpo_v2_0.yaml │ │ │ ├── llama2_7b_step_dpo_v3_0.yaml │ │ │ ├── llama2_7b_step_dpo_v4_0.yaml │ │ │ ├── llama2_7b_step_dpo_v4_1.yaml │ │ │ ├── llama2_7b_step_dpo_v4_1_1.yaml │ │ │ ├── llama2_7b_step_dpo_v4_2.yaml │ │ │ ├── llama2_7b_step_dpo_v4_2_fix.yaml │ │ │ ├── llama2_7b_step_dpo_v4_3.yaml │ │ │ ├── llama2_7b_step_dpo_v5_0.yaml │ │ │ ├── llama2_7b_step_dpo_v5_1.yaml │ │ │ ├── llama2_7b_step_dpo_v5_2.yaml │ │ │ ├── llama2_7b_step_dpo_v6_0.yaml │ │ │ ├── llama2_7b_step_dpo_v6_1.yaml │ │ │ ├── llama2_7b_step_dpo_v6_2.yaml │ │ │ ├── llama2_7b_step_dpo_v6_3.yaml │ │ │ ├── llama2_7b_step_dpo_v6_4.yaml │ │ │ ├── llama2_7b_step_dpo_v6_5.yaml │ │ │ └── llama2_7b_step_dpo_v6_5_1.yaml │ │ └── reclor │ │ │ ├── llama2_7b_gpt351106_distil_dpo_v1_0.yaml │ │ │ ├── llama2_7b_gpt351106_distil_dpo_v2_0.yaml │ │ │ ├── llama2_7b_gpt351106_distil_dpo_v3_0.yaml │ │ │ ├── llama2_7b_gpt351106_distil_dpo_v3_1.yaml │ │ │ ├── llama2_7b_gpt351106_distil_dpo_v3_2.yaml │ │ │ ├── llama2_7b_gpt351106_distil_dpo_v3_3.yaml │ │ │ ├── llama2_7b_gpt351106_distil_dpo_v3_4.yaml │ │ │ ├── llama2_7b_gpt351106_distil_dpo_v4_0.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v1_0.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v1_1.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v1_2.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v1_3.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v1_4.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v1_5.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v1_6.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v1_7.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v1_8.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v1_9.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v2_0.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v2_1.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v3_0.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v4_0.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v4_1.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_0.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_0_1.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_1.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_2.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_3.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_3_1.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_3_2.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_3_3.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_3_4.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_3_5.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_4.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_4_1.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_4_2.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_4_3.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_4_5.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_4_6.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_4_7.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_5.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_5_1.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_5_2.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_5_3.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_6.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v5_6_1.yaml │ │ │ ├── llama2_7b_gpt351106_distil_step_dpo_v6_0.yaml │ │ │ ├── llama2_7b_mixtral_distil_dpo_v1_0.yaml │ │ │ ├── llama2_7b_mixtral_distil_dpo_v2_0.yaml │ │ │ ├── llama2_7b_mixtral_distil_dpo_v2_0_test.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v2_0.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v2_1.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v2_2.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v2_2_1.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v2_2_2.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v2_2_3.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v2_2_4.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v2_3.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v2_3_1.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v2_3_2.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v2_4.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v2_4_1.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v2_4_2.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v2_4_3.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v2_4_4.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v3_0.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v3_1.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v3_2.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v3_3.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v3_4.yaml │ │ │ ├── llama2_7b_mixtral_distil_step_dpo_v3_5.yaml │ │ │ ├── llama2_7b_mixtral_step_dpo_v1_0.yaml │ │ │ └── logiqa_pretrain │ │ │ ├── llama2_7b_lqv2_step_dpo_dpo_v1_0.yaml │ │ │ ├── llama2_7b_lqv2_step_dpo_step_dpo_v1_0.yaml │ │ │ └── llama2_7b_lqv2_step_dpo_step_dpo_v1_1.yaml │ ├── grpo │ │ ├── logiqav2_prm_v1_0.yaml │ │ ├── logiqav2_prm_v2_0.yaml │ │ └── logiqav2_prm_v2_1.yaml │ ├── ppo │ │ ├── logiqav2_prm_v1_0_test.yaml │ │ ├── logiqav2_prm_v1_1_test.yaml │ │ ├── logiqav2_prm_v2_0.yaml │ │ ├── logiqav2_prm_v2_1.yaml │ │ ├── logiqav2_prm_v2_2.yaml │ │ ├── logiqav2_prm_v2_3.yaml │ │ ├── logiqav2_prm_v2_4.yaml │ │ ├── logiqav2_prm_v2_5.yaml │ │ ├── logiqav2_prm_v2_6.yaml │ │ ├── logiqav2_prm_v3_0.yaml │ │ ├── logiqav2_prm_v4_0.yaml │ │ ├── logiqav2_prm_v4_1.yaml │ │ └── logiqav2_prm_v4_2.yaml │ ├── remax │ │ ├── v1_0.yaml │ │ ├── v1_2.yaml │ │ ├── v1_3.yaml │ │ ├── v1_4.yaml │ │ ├── v1_5.yaml │ │ ├── v1_6.yaml │ │ └── v1_7.yaml │ ├── reward │ │ ├── combine │ │ │ └── llama2_7b_70bdistil_prm_v1_0.yaml │ │ ├── deepseek │ │ │ └── meta_math_cot │ │ │ │ ├── prm_v1_0.yaml │ │ │ │ ├── prm_v1_0_predict.yaml │ │ │ │ ├── prm_v1_1.yaml │ │ │ │ ├── prm_v1_1_predict.yaml │ │ │ │ ├── prm_v1_2.yaml │ │ │ │ ├── prm_v1_2_predict.yaml │ │ │ │ ├── prm_v2_0.yaml │ │ │ │ ├── prm_v2_1.yaml │ │ │ │ └── prm_v2_1_predict.yaml │ │ ├── gemma │ │ │ └── meta_math_rap │ │ │ │ ├── prm_v1_0.yaml │ │ │ │ ├── prm_v1_0_predict.yaml │ │ │ │ ├── prm_v1_1_gsm8k.yaml │ │ │ │ ├── prm_v1_1_math.yaml │ │ │ │ ├── prm_v1_2_gsm8k.yaml │ │ │ │ ├── prm_v1_2_gsm8k_predict.yaml │ │ │ │ ├── prm_v1_2_math.yaml │ │ │ │ └── prm_v1_2_math_predict.yaml │ │ ├── logiqav2 │ │ │ ├── llama2_7b_70bdistil_orm_v1_0.yaml │ │ │ ├── llama2_7b_70bdistil_orm_v1_0_predict.yaml │ │ │ ├── llama2_7b_70bdistil_orm_v1_1.yaml │ │ │ ├── llama2_7b_70bdistil_orm_v1_2.yaml │ │ │ ├── llama2_7b_70bdistil_orm_v1_2_predict.yaml │ │ │ ├── llama2_7b_70bdistil_prm_iter1_replay_v1_0.yaml │ │ │ ├── llama2_7b_70bdistil_prm_iter1_replay_v1_1.yaml │ │ │ ├── llama2_7b_70bdistil_prm_iter1_replay_v1_1_predict.yaml │ │ │ ├── llama2_7b_70bdistil_prm_iter1_replay_v1_2.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_0.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_0_iter1.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_0_iter1_predict.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_0_predict.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_0_predict_v1_1.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_0_predict_v1_2.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_0_predict_v1_3.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_0_predict_v1_4.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_1_iter1.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_1_iter1_predict.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_2_iter1.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_2_iter1_predict.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_3_iter1.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_3_iter1_predict.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v2_0.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v2_0_predict.yaml │ │ │ └── llama2_7b_70bdistil_prm_v2_0_predict_reclor.yaml │ │ ├── out-of-date │ │ │ ├── llama2_7b_70bdistil_prm_v1_0.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_1.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_1_predict.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_2.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_2_predict.yaml │ │ │ ├── llama2_7b_70bdistil_rm_full_v1_0.yaml │ │ │ ├── llama2_7b_70bdistil_rm_full_v1_0_predict.yaml │ │ │ ├── llama2_7b_70bdistil_rm_full_v1_0_predict_v11.yaml │ │ │ ├── llama2_7b_70bdistil_rm_v1_0.yaml │ │ │ ├── llama2_7b_70bdistil_rm_v1_0_predict.yaml │ │ │ ├── llama2_7b_70bdistil_rm_v1_0_predict_v11.yaml │ │ │ ├── llama2_7b_70bdistil_rm_v2_0.yaml │ │ │ ├── llama2_7b_70bdistil_rm_v2_0_predict.yaml │ │ │ ├── llama2_7b_70bdistil_rm_v3_0.yaml │ │ │ ├── llama2_7b_70bdistil_rm_v4_0.yaml │ │ │ ├── llama2_7b_70bdistil_rm_v4_0_predict.yaml │ │ │ ├── llama2_7b_70bdistil_rm_v5_0.yaml │ │ │ ├── llama2_7b_70bdistil_rm_v5_0_predict.yaml │ │ │ ├── llama2_7b_70bdistil_rm_v5_1.yaml │ │ │ ├── llama2_7b_rm_v1_0.yaml │ │ │ └── mistral_7b_rm_v1_0.yaml │ │ └── reclor │ │ │ ├── llama2_7b_70bdistil_prm_v1_2.yaml │ │ │ ├── llama2_7b_70bdistil_prm_v1_2_predict.yaml │ │ │ ├── llama2_7b_gpt351106_prm_v1_0.yaml │ │ │ ├── llama2_7b_gpt351106_prm_v1_0_predict.yaml │ │ │ ├── llama2_7b_gpt351106_prm_v1_1.yaml │ │ │ ├── llama2_7b_gpt351106_prm_v1_1_predict_logiqav2.yaml │ │ │ ├── llama2_7b_gpt351106_prm_v1_2.yaml │ │ │ ├── llama2_7b_gpt351106_prm_v1_3.yaml │ │ │ ├── llama2_7b_gpt351106_prm_v1_3_predict.yaml │ │ │ ├── llama2_7b_gpt351106_prm_v1_4.yaml │ │ │ ├── llama2_7b_gpt351106_prm_v1_4_predict.yaml │ │ │ ├── llama2_7b_gpt351106_prm_v2_0.yaml │ │ │ ├── llama2_7b_gpt351106_prm_v2_0_predict.yaml │ │ │ ├── llama2_7b_mixtral_prm_v1_0.yaml │ │ │ ├── llama2_7b_mixtral_prm_v1_0_predict.yaml │ │ │ ├── llama2_7b_mixtral_prm_v1_0_predict_v1_1.yaml │ │ │ ├── llama2_7b_mixtral_prm_v1_1.yaml │ │ │ ├── llama2_7b_mixtral_prm_v1_2.yaml │ │ │ ├── llama2_7b_mixtral_prm_v1_2_predict.yaml │ │ │ ├── llama2_7b_mixtral_prm_v1_3.yaml │ │ │ ├── llama2_7b_mixtral_prm_v1_4.yaml │ │ │ ├── llama2_7b_mixtral_prm_v1_4_1.yaml │ │ │ ├── llama2_7b_mixtral_prm_v1_4_predict.yaml │ │ │ ├── llama2_7b_mixtral_prm_v1_5.yaml │ │ │ ├── llama2_7b_mixtral_prm_v1_5_predict.yaml │ │ │ └── llama2_7b_mixtral_prm_v1_5_predict_v1_1.yaml │ └── sft │ │ ├── llama2_7b_70bdistil_rs_sft_v1_0.yaml │ │ ├── llama2_7b_70bdistil_self_sft_v1_0.yaml │ │ ├── llama2_7b_distil_dpo_sft_v1_0.yaml │ │ ├── llama2_7b_gpt35turbo_dpo_sft_v1_0.yaml │ │ ├── llama2_7b_gpt35turbo_dpo_sft_v1_1.yaml │ │ ├── llama2_7b_gpt35turbo_dpo_sft_v2_0.yaml │ │ ├── llama2_7b_gpt35turbo_dpo_sft_v2_1.yaml │ │ ├── llama2_7b_gpt35turbo_dpo_sft_v2_1_gpu_test.yaml │ │ ├── llama2_7b_llama2-70b-chat_dpo_sft_v1_0.yaml │ │ ├── llama2_7b_llama2-70b-chat_dpo_sft_v1_0_tp_test.yaml │ │ ├── logiqav2 │ │ ├── gemma_2b_llama2-70b-chat_dpo_sft_v1_0.yaml │ │ ├── llama2_7b_70bdistil_prm_rft_v1_0.yaml │ │ ├── llama2_7b_70bdistil_rft_v1_0.yaml │ │ ├── llama2_7b_70bdistil_rft_v1_1.yaml │ │ ├── llama2_7b_70bdistil_self_sft_v1_0.yaml │ │ └── llama2_7b_70bdistil_self_sft_v2_0.yaml │ │ └── reclor │ │ ├── llama2_7b_gpt35_dpo_sft_v1_0.yaml │ │ ├── llama2_7b_gpt35_dpo_sft_v2_0.yaml │ │ └── llama2_7b_mixtral_dpo_sft_v1_0.yaml ├── hydra │ └── default.yaml ├── post_process │ └── openai_react.yaml ├── reader │ ├── logiqav2 │ │ ├── react_service_0shot_v1_0.yaml │ │ └── react_service_1shot_v1_0.yaml │ └── reclor │ │ ├── react_service_0shot_v1_0.yaml │ │ └── react_service_1shot_v1_0.yaml └── test.yaml ├── data ├── ar_lsat.py ├── deepseek_math_utils │ ├── answer_extraction.py │ ├── eval_script.py │ ├── eval_utils.py │ └── ocwcourses_eval_utils.py ├── dpo.py ├── folio.py ├── general.py ├── input_aligner.py ├── logic_combine.py ├── logiqav2.py ├── math.py ├── math_util.py ├── meta_math │ ├── react_prompt_0.txt │ └── react_prompt_1.txt ├── mini_dataset.py ├── openai_api_caller.py ├── prompts │ ├── ar_lsat │ │ └── react │ │ │ └── train_200006_1-G_1_1.txt │ ├── folio │ │ └── react │ │ │ ├── train_1131.txt │ │ │ └── train_1131_1.txt │ └── logiqav2 │ │ ├── compare_response │ │ ├── prompt_0.txt │ │ └── template_th_0.txt │ │ ├── decomposition │ │ ├── gpt4 │ │ │ └── dev_10741_0.md │ │ ├── human │ │ │ └── dev_218_0.md │ │ ├── llama-2-70b-chat │ │ │ └── dev_218_0.md │ │ └── structurize │ │ │ └── llama-2-70b-chat │ │ │ ├── prompts_0.md │ │ │ ├── prompts_1.md │ │ │ └── prompts_2.md │ │ ├── gpt4 │ │ └── dev_218_0.md │ │ ├── logic_form │ │ ├── gpt4 │ │ │ └── dev_218_0.md │ │ ├── human │ │ │ ├── dev_218_0.md │ │ │ ├── dev_218_0_sim.md │ │ │ ├── dev_7261_0.md │ │ │ └── dev_7261_0_sim.md │ │ └── prompt_0.md │ │ ├── modify │ │ ├── train_27.txt │ │ └── train_27_full.txt │ │ └── react │ │ ├── prompts.py │ │ ├── train_4554.txt │ │ └── train_4554_p1.txt ├── reclor.py └── vllm.py ├── general_util ├── __init__.py ├── average_meter.py ├── dist_utils.py ├── evaluator.py ├── fsdp_utils.py ├── lightseq_utils.py ├── logger.py ├── mixin.py ├── tensorboard_helper.py ├── tokenization_utils.py ├── torch_fsdp_utils.py ├── training_utils.py └── transformer_engine.py ├── lora_merge.py ├── lora_share_trainer ├── grpo_engine.py ├── lora_share_remax_engine.py ├── ppo_engine.py ├── remax_trainer.py └── utils │ ├── ds_utils.py │ ├── fp8.py │ ├── post_process.py │ └── utils.py ├── main.py ├── models ├── gemma.py ├── llama.py ├── llama_tp.py ├── megatron_llama.py ├── mistral.py ├── phi.py ├── reward_model_mixin.py ├── string_rule_reward.py └── utils.py ├── openai_api_caller_v1.py ├── post_processors ├── dist_mixin.py ├── dpo.py └── openai_api_callback.py ├── requirements.txt ├── run_service.sh ├── run_step_dpo.sh ├── scripts ├── best_of_filter_by_reward_v1.0.py ├── best_of_filter_by_reward_v1.1.py ├── best_of_filter_by_reward_v1.2.py ├── best_of_filter_by_reward_v1.3.py ├── best_of_filter_by_reward_v2.1.py ├── best_of_filter_by_reward_v2.2.1.py ├── best_of_filter_by_reward_v2.2.py ├── best_of_filter_by_reward_v2.2_pos_only.py ├── best_of_filter_by_reward_v2.2_topk.py ├── best_of_filter_by_reward_v2.4.py ├── best_of_filter_by_reward_v2.5.py ├── best_of_filter_by_reward_v2.6.py ├── best_of_filter_by_reward_v2.7.py ├── best_of_filter_by_reward_v3.0.py ├── calculate_acc_w_clean.py ├── calculate_react_acc_w_clean.py ├── check_rewards_v1.0.py ├── combine_reward_debug_v1.0.py ├── combine_worsen_response.py ├── construct_dpo_data_from_react_response.py ├── construct_dpo_data_from_react_response_v1.1.py ├── construct_dpo_data_from_response.py ├── construct_dpo_data_via_step_value_v1.1.py ├── construct_dpo_data_via_step_value_v1.py ├── construct_dpo_data_via_step_value_v2.0.py ├── construct_dpo_data_via_worsen_response.py ├── cot │ ├── collect_rejection_sampling.py │ ├── cot_clean.py │ ├── cot_step_accumulate.py │ ├── deepseek_clean.py │ ├── deepseek_cot_sample_steps.py │ ├── deepseek_dpo_pair_by_reward.sh │ ├── deepseek_dpo_pair_by_reward_v1.0.py │ ├── deepseek_dpo_pair_by_reward_v4.0.sh │ ├── deepseek_dpo_pair_by_reward_v5.0.sh │ ├── dpo_pair_by_reward.sh │ ├── dpo_pair_by_reward_v1.0.py │ ├── meta_math_dpo_pair_by_reward_v1.0.py │ ├── rap_clean.py │ ├── rap_fix_pred.py │ ├── rap_step_accumulate.py │ └── step_contrastive.py ├── deepspeed │ └── ds_full_checkpoint2hf.py ├── explore_from_inter │ ├── best_of_filter_full.sh │ ├── combine_worsen_response.sh │ ├── reclor │ │ └── best_of_filter_full.sh │ ├── reject_sampling_best_of.sh │ ├── run_llama_sft_v2.0.sh │ ├── run_llama_v2.0.sh │ ├── run_llama_v2.0_reward.sh │ ├── run_llama_v3.0.sh │ └── run_v2.0.sh ├── filter_dpo_pair_by_predict_reward.py ├── filter_dpo_pair_by_predict_reward_v2.0.py ├── fixed_explore_from_infer │ ├── logiqav2 │ │ ├── best_of_filter_full.sh │ │ ├── best_of_filter_full_history.log │ │ ├── best_of_filter_full_iter1.sh │ │ ├── best_of_filter_full_prm_v2.0.sh │ │ ├── best_of_filter_full_v2.7.sh │ │ ├── best_of_filter_full_v3.0.sh │ │ ├── check_rewards.sh │ │ ├── construct_dpo.sh │ │ ├── reward_eval.sh │ │ └── split_pair.sh │ ├── readme.md │ └── reclor │ │ ├── best_of_filter_full.sh │ │ ├── best_of_filter_gpt35.sh │ │ ├── best_of_filter_gpt35_no_action.sh │ │ ├── best_of_filter_gpt35_resp30merge.sh │ │ ├── best_of_filter_logiqav2.sh │ │ └── construct_dpo.sh ├── inference │ ├── calibra_results.sh │ ├── calibra_results_logiqav2_sc.sh │ ├── calibra_results_reclor_sc.sh │ ├── calibra_results_reclor_v1.1.sh │ ├── run_query_ar_lsat_vllm_v2.1.sh │ ├── run_query_folio_vllm.sh │ ├── run_query_gsm8k_vllm_v1.0.sh │ ├── run_query_gsm8k_vllm_v1.1.sh │ ├── run_query_logiqav2_dev_order.sh │ ├── run_query_logiqav2_order.sh │ ├── run_query_logiqav2_vllm.sh │ ├── run_query_logiqav2_vllm_reclor.sh │ ├── run_query_logiqav2_vllm_reclor_v1.1.sh │ ├── run_query_logiqav2_vllm_sc_v2.1.sh │ ├── run_query_logiqav2_vllm_v2.1.sh │ ├── run_query_math_vllm_v1.0.sh │ ├── run_query_math_vllm_v1.1.sh │ └── run_query_reclor_vllm_sc_v1.1.sh ├── line-scale.py ├── merge_response.py ├── model_converts │ └── llama_hf_mp_split.py ├── process_inter.sh ├── process_inter_response.py ├── process_inter_response_v2.0.py ├── process_inter_response_v2.1.py ├── process_inter_response_v2.1_by_reward.py ├── process_inter_response_v2.2.py ├── process_inter_response_v2.3.py ├── process_inter_response_v2.3_by_rewards.py ├── process_inter_response_v2.4.py ├── process_inter_response_v2.5.py ├── process_inter_response_v2.6.py ├── process_inter_response_v3.0.py ├── process_inter_response_v3.1.py ├── process_inter_response_v3.2.py ├── process_llama.sh ├── process_react_nodes.py ├── process_sft.sh ├── process_sft_inter.sh ├── process_sft_v2.sh ├── process_turbo.sh ├── react_step_union_find.py ├── react_step_union_find_v2.py ├── reject_sample_best_of_filter_by_reward_v1.0.py ├── sample_react_inter_states.py ├── sample_react_inter_states_v2.0.py ├── sample_react_inter_states_v2.1.py ├── sample_react_inter_states_v2.2.py ├── sample_react_inter_states_v2.3.py ├── sent_tf_react_step_encoding.py ├── split_pairs_according_to_ids.py ├── split_response_train_dev_according2item_id.py ├── split_train_dev.py └── win_rate_cmp_v1.0.py ├── service_api_caller_v1.py ├── service_api_caller_v2.py ├── test.py ├── test.sh ├── test_deepspeed.py ├── trainer_base_ds_mul.py ├── trainer_base_ds_mul_fs_tp.py ├── trainer_base_ds_mul_te.py ├── vllm_inference.py └── write_deployment_to_cache.py /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/README.md -------------------------------------------------------------------------------- /Untitled.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/Untitled.ipynb -------------------------------------------------------------------------------- /conf/api/gpt35turbo/ar_lsat/dev_react_1shot_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/gpt35turbo/ar_lsat/dev_react_1shot_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/gpt35turbo/logiqav2/dev_react_v1_0_1shot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/gpt35turbo/logiqav2/dev_react_v1_0_1shot.yaml -------------------------------------------------------------------------------- /conf/api/gpt35turbo/reclor/dev_react_v1_0_1shot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/gpt35turbo/reclor/dev_react_v1_0_1shot.yaml -------------------------------------------------------------------------------- /conf/api/gpt35turbo/reclor/train_react_v1_0_1shot_sample10.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/gpt35turbo/reclor/train_react_v1_0_1shot_sample10.yaml -------------------------------------------------------------------------------- /conf/api/gpt4/compare_response/react_response_cmp_0shot_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/gpt4/compare_response/react_response_cmp_0shot_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/gpt4/logiqav2/dev_react_v1_0_1shot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/gpt4/logiqav2/dev_react_v1_0_1shot.yaml -------------------------------------------------------------------------------- /conf/api/gpt4/reclor/dev_react_v1_0_1shot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/gpt4/reclor/dev_react_v1_0_1shot.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-70b/ar_lsat/dev_react_1shot_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-70b/ar_lsat/dev_react_1shot_v2_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-70b/ar_lsat/dev_react_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-70b/ar_lsat/dev_react_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-70b/ar_lsat/dev_react_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-70b/ar_lsat/dev_react_v1_1.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-70b/logiqav2/dev_0shot_v1_0_service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-70b/logiqav2/dev_0shot_v1_0_service.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-70b/logiqav2/react_dev_1shot_v1_0_service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-70b/logiqav2/react_dev_1shot_v1_0_service.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-70b/reclor/dev_0shot_v1_0_service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-70b/reclor/dev_0shot_v1_0_service.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-70b/reclor/dev_react_1shot_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-70b/reclor/dev_react_1shot_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-70b/reclor/train_react_1shot_sample5_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-70b/reclor/train_react_1shot_sample5_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/ar_lsat_tems/dev_react_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/ar_lsat_tems/dev_react_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/ar_lsat_tems/test_react_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/ar_lsat_tems/test_react_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/folio_tems/react_dev_0shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/folio_tems/react_dev_0shot_tem_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v2_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v2_0_0shot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v2_0_0shot.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v4_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v4_1.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v4_1_0shot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_dpo_v4_1_0shot.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_sft_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_sft_v2_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_sft_v2_0_0shot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_sft_v2_0_0shot.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v2_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v2_0_0shot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v2_0_0shot.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v3_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v3_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v4_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v4_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v4_1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v4_1_1.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v4_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v4_3.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_0_0shot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_0_0shot.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_0_fix_0shot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_0_fix_0shot.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_0_fix_1shot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_0_fix_1shot.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_1_0shot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_step_dpo_v5_1_0shot.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_dev_react_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_react_0shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_react_0shot_tem_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_react_dpo_test_0shot_tem_v1_0_s0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_react_dpo_test_0shot_tem_v1_0_s0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_react_test_0shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_react_test_0shot_tem_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_react_test_0shot_tem_v1_0_s0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_react_test_0shot_tem_v1_0_s0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_qa_sft70bdistil_dev_react_v1_0_0shot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_qa_sft70bdistil_dev_react_v1_0_0shot.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v1_0_o1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v1_0_o1.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v1_0_o2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v1_0_o2.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v2_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v2_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v2_1.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v2_1_sc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_tems/react_dev_0shot_tem_v2_1_sc.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v1_0_o1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v1_0_o1.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v1_0_o2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v1_0_o2.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v2_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v2_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v2_1.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v2_1_sc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_tems/react_test_0shot_tem_v2_1_sc.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/logiqav2_tems/react_test_1shot_tem_v2_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/logiqav2_tems/react_test_1shot_tem_v2_1.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/reclor_tems/dev_react_0shot_v1_0_service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/reclor_tems/dev_react_0shot_v1_0_service.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/reclor_tems/dev_react_0shot_v1_0_vllm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/reclor_tems/dev_react_0shot_v1_0_vllm.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/reclor_tems/dev_react_0shot_v1_1_vllm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/reclor_tems/dev_react_0shot_v1_1_vllm.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/reclor_tems/dev_react_0shot_v1_1_vllm_sc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/reclor_tems/dev_react_0shot_v1_1_vllm_sc.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/reclor_tems/test_react_0shot_v1_0_vllm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/reclor_tems/test_react_0shot_v1_0_vllm.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/reclor_tems/test_react_0shot_v1_1_vllm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/reclor_tems/test_react_0shot_v1_1_vllm.yaml -------------------------------------------------------------------------------- /conf/api/vllm/llama2-7b/reclor_tems/test_react_0shot_v1_1_vllm_sc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/llama2-7b/reclor_tems/test_react_0shot_v1_1_vllm_sc.yaml -------------------------------------------------------------------------------- /conf/api/vllm/logiqav2_qa_dev_decompose_dpo_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/logiqav2_qa_dev_decompose_dpo_v2_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/logiqav2_qa_dev_decompose_dpo_v2_0_cp1800.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/logiqav2_qa_dev_decompose_dpo_v2_0_cp1800.yaml -------------------------------------------------------------------------------- /conf/api/vllm/logiqav2_qa_dev_decompose_sft_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/logiqav2_qa_dev_decompose_sft_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/logiqav2_qa_dev_react_step_dpo_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/logiqav2_qa_dev_react_step_dpo_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/math/gsm8k_deepseek_test_0shot_tem_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/math/gsm8k_deepseek_test_0shot_tem_v1_1.yaml -------------------------------------------------------------------------------- /conf/api/vllm/math/gsm8k_gemma_test_0shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/math/gsm8k_gemma_test_0shot_tem_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/math/gsm8k_gemma_test_0shot_tem_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/math/gsm8k_gemma_test_0shot_tem_v1_1.yaml -------------------------------------------------------------------------------- /conf/api/vllm/math/math_deepseek_test_0shot_tem_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/math/math_deepseek_test_0shot_tem_v1_1.yaml -------------------------------------------------------------------------------- /conf/api/vllm/math/math_gemma_test_0shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/math/math_gemma_test_0shot_tem_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/math/math_gemma_test_0shot_tem_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/math/math_gemma_test_0shot_tem_v1_1.yaml -------------------------------------------------------------------------------- /conf/api/vllm/math/meta_cot_train_0shot_completion_v1_0_sample.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/math/meta_cot_train_0shot_completion_v1_0_sample.yaml -------------------------------------------------------------------------------- /conf/api/vllm/math/meta_cot_train_0shot_v1_0_sample.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/math/meta_cot_train_0shot_v1_0_sample.yaml -------------------------------------------------------------------------------- /conf/api/vllm/math/meta_deepseek_cot_train_0shot_v1_0_sample.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/math/meta_deepseek_cot_train_0shot_v1_0_sample.yaml -------------------------------------------------------------------------------- /conf/api/vllm/math/meta_sub_rap_train_0shot_completion_v1_0_sample.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/math/meta_sub_rap_train_0shot_completion_v1_0_sample.yaml -------------------------------------------------------------------------------- /conf/api/vllm/math/meta_sub_rap_train_0shot_v1_0_sample.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/math/meta_sub_rap_train_0shot_v1_0_sample.yaml -------------------------------------------------------------------------------- /conf/api/vllm/math/meta_sub_rap_train_1shot_v1_0_sample.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/math/meta_sub_rap_train_1shot_v1_0_sample.yaml -------------------------------------------------------------------------------- /conf/api/vllm/math/service/cot_train_0shot_completion_v1_0_sample.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/math/service/cot_train_0shot_completion_v1_0_sample.yaml -------------------------------------------------------------------------------- /conf/api/vllm/math/service/cot_train_0shot_v1_0_sample.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/math/service/cot_train_0shot_v1_0_sample.yaml -------------------------------------------------------------------------------- /conf/api/vllm/minicpm/logiqav2/react_dev_0shot_v1_0_service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/minicpm/logiqav2/react_dev_0shot_v1_0_service.yaml -------------------------------------------------------------------------------- /conf/api/vllm/minicpm/logiqav2/react_dev_1shot_v1_0_service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/minicpm/logiqav2/react_dev_1shot_v1_0_service.yaml -------------------------------------------------------------------------------- /conf/api/vllm/mistral/logiqav2/dev_0shot_v1_0_service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/mistral/logiqav2/dev_0shot_v1_0_service.yaml -------------------------------------------------------------------------------- /conf/api/vllm/mistral/logiqav2/tems/react_test_1shot_tem_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/mistral/logiqav2/tems/react_test_1shot_tem_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/mistral/logiqav2/tems/react_test_1shot_tem_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/mistral/logiqav2/tems/react_test_1shot_tem_v1_1.yaml -------------------------------------------------------------------------------- /conf/api/vllm/mistral/logiqav2/tems/react_test_1shot_tem_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/mistral/logiqav2/tems/react_test_1shot_tem_v2_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/mistral/reclor/dev_0shot_v1_0_service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/mistral/reclor/dev_0shot_v1_0_service.yaml -------------------------------------------------------------------------------- /conf/api/vllm/mistral/reclor/dev_react_1shot_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/mistral/reclor/dev_react_1shot_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/mistral/reclor/train_react_1shot_sample5_split_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/mistral/reclor/train_react_1shot_sample5_split_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/mistral/reclor/train_react_1shot_sample5_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/mistral/reclor/train_react_1shot_sample5_v1_0.yaml -------------------------------------------------------------------------------- /conf/api/vllm/qwen/logiqav2/react_dev_0shot_v1_0_service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/qwen/logiqav2/react_dev_0shot_v1_0_service.yaml -------------------------------------------------------------------------------- /conf/api/vllm/qwen/logiqav2/react_dev_1shot_v1_0_service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/qwen/logiqav2/react_dev_1shot_v1_0_service.yaml -------------------------------------------------------------------------------- /conf/api/vllm/vllm_params/sampling_param_greedy.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/vllm_params/sampling_param_greedy.yaml -------------------------------------------------------------------------------- /conf/api/vllm/vllm_params/sampling_param_sample.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/api/vllm/vllm_params/sampling_param_sample.yaml -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/deepspeed/train_hybrid_engine_zero1.yaml -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero1_lr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/deepspeed/train_hybrid_engine_zero1_lr.yaml -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero1_optim_offload.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/deepspeed/train_hybrid_engine_zero1_optim_offload.yaml -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero1_optim_offload_cosine.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/deepspeed/train_hybrid_engine_zero1_optim_offload_cosine.yaml -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero1_optim_offload_lr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/deepspeed/train_hybrid_engine_zero1_optim_offload_lr.yaml -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/deepspeed/train_hybrid_engine_zero2.yaml -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero2_lr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/deepspeed/train_hybrid_engine_zero2_lr.yaml -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero2_optim_offload.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/deepspeed/train_hybrid_engine_zero2_optim_offload.yaml -------------------------------------------------------------------------------- /conf/deepspeed/train_hybrid_engine_zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/deepspeed/train_hybrid_engine_zero3.yaml -------------------------------------------------------------------------------- /conf/engines/remax_defaults.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /conf/exp/dpo/deepseek/meta_math_cot/dpo_v5_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/deepseek/meta_math_cot/dpo_v5_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/deepseek/meta_math_cot/step_dpo_v5_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/deepseek/meta_math_cot/step_dpo_v5_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/dpo_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/dpo_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/dpo_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/dpo_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/dpo_v1_1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/dpo_v1_1_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/dpo_v1_1_a100_40.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/dpo_v1_1_a100_40.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/dpo_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/dpo_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_0_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_0_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_1_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_1_a6k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_1_a6k.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_2_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_2_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_2_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_2_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_2_a6k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_2_a6k.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_5.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_6.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_6.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_6_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/step_dpo_v1_6_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/step_dpo_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/step_dpo_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/step_dpo_v2_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/step_dpo_v2_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/gemma/meta_math_rap/step_dpo_v2_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/gemma/meta_math_rap/step_dpo_v2_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_70b_step_dpo_v1_0_th.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_70b_step_dpo_v1_0_th.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_iter1_v1_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_iter1_v1_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_0_A40w8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_0_A40w8.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_0_iter1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_0_iter1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_0_th.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_0_th.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_0_th_test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_0_th_test.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_0_th_test_ratio.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_0_th_test_ratio.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_0_w2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_0_w2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_1_iter1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_1_iter1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_1_th.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_1_th.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_1_w2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_1_w2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_1_w4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_1_w4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_1_w8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_1_w8.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_dpo_v1_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_ipo_v1_0_th_test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_ipo_v1_0_th_test.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_10.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_10.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_11.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_11.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_2_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_2_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_2_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_2_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_2_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_2_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_5.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_5_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_5_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_6.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_6.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_7.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_7.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_8.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_8_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_8_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_9.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v2_9.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v3_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v3_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v3_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v3_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v3_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v3_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v3_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v3_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v3_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v3_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v3_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v3_5.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v3_6.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v3_6.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v3_7.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v3_7.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v4_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v4_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v4_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v4_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v5_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v5_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v5_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v5_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v6_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v6_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v6_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v6_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v6_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_iter1_v6_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_raw_v2_0_th.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_raw_v2_0_th.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_raw_v2_1_th.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_raw_v2_1_th.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_1_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_3_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_3_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_4_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_4_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_4_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_4_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_iter1_th.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_iter1_th.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_th.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_th.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_th_margin3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_th_margin3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_th_margin7.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_th_margin7.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_th_ratio.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_th_ratio.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_th_w2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_th_w2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_w2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_0_w2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_1_iter1_th.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_1_iter1_th.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_1_th.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_1_th.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_1_th_a40w8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_1_th_a40w8.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_1_th_w2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_1_th_w2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_2_iter1_th.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_2_iter1_th.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_2_th_a40w8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_2_th_a40w8.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_2_w4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_2_w4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_5.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_6.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_6.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_7.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_7.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_8.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_9.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v1_9.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v3_0_sum_th.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v3_0_sum_th.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v3_1_sum_th.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_dpo_v3_1_sum_th.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_ipo_v1_0_th.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/logiqav2/llama2_7b_70bdistil_step_ipo_v1_0_th.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_dpo_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_dpo_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_dpo_v1_0_4k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_dpo_v1_0_4k.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_dpo_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_dpo_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_10.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_10.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_2_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_2_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_2_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_2_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_2_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_2_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_2_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_2_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_2_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_2_5.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_3_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_3_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_4_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_4_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_4_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_4_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_5.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_6.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_6.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_7.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_7.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_8.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_8_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_8_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_9.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v1_9.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v2_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v2_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v2_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v2_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v2_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v2_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v2_3_prefix.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v2_3_prefix.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v3_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v3_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v3_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v3_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v3_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v3_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v3_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v3_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v4_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v4_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v4_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v4_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v5_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v5_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v5_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v5_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v6_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v6_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v6_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v6_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v6_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v6_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v6_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v6_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v6_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v6_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v6_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v6_5.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_10.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_10.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_11.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_11.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_4_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_4_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_5.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_6.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_6.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_7.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_7.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_8.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_9.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_9.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_9_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_9_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_9_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_9_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_9_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_9_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_9_3_h100.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_9_3_h100.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_9_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_70bdistil_step_dpo_v7_9_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_distil_dpo_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_distil_dpo_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_distil_dpo_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_distil_dpo_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_distil_dpo_v1_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_distil_dpo_v1_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_distil_dpo_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_distil_dpo_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_dpo_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_dpo_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_dpo_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_dpo_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_dpo_v3_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_dpo_v3_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_dpo_v4_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_dpo_v4_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_dpo_v4_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_dpo_v4_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v3_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v3_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v4_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v4_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v4_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v4_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v4_1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v4_1_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v4_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v4_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v4_2_fix.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v4_2_fix.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v4_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v4_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v5_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v5_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v5_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v5_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v5_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v5_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v6_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v6_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v6_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v6_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v6_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v6_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v6_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v6_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v6_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v6_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v6_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v6_5.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v6_5_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/out-of-date/llama2_7b_step_dpo_v6_5_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_dpo_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_dpo_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_dpo_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_dpo_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_dpo_v3_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_dpo_v3_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_dpo_v3_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_dpo_v3_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_dpo_v3_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_dpo_v3_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_dpo_v3_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_dpo_v3_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_dpo_v3_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_dpo_v3_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_dpo_v4_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_dpo_v4_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_5.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_6.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_6.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_7.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_7.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_8.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_9.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v1_9.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v2_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v2_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v3_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v3_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v4_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v4_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v4_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v4_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_0_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_0_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_3_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_3_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_3_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_3_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_3_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_3_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_3_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_3_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_3_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_3_5.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_4_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_4_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_4_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_4_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_4_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_4_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_4_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_4_5.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_4_6.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_4_6.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_4_7.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_4_7.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_5.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_5_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_5_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_5_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_5_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_5_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_5_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_6.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_6.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_6_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v5_6_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v6_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_gpt351106_distil_step_dpo_v6_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_dpo_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_dpo_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_dpo_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_dpo_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_dpo_v2_0_test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_dpo_v2_0_test.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_2_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_2_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_2_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_2_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_2_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_2_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_2_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_2_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_3_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_3_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_3_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_3_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_4_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_4_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_4_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_4_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_4_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_4_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_4_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v2_4_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v3_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v3_0.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v3_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v3_1.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v3_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v3_2.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v3_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v3_3.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v3_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v3_4.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v3_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_distil_step_dpo_v3_5.yaml -------------------------------------------------------------------------------- /conf/exp/dpo/reclor/llama2_7b_mixtral_step_dpo_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/dpo/reclor/llama2_7b_mixtral_step_dpo_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/grpo/logiqav2_prm_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/grpo/logiqav2_prm_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/grpo/logiqav2_prm_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/grpo/logiqav2_prm_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/grpo/logiqav2_prm_v2_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/grpo/logiqav2_prm_v2_1.yaml -------------------------------------------------------------------------------- /conf/exp/ppo/logiqav2_prm_v1_0_test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/ppo/logiqav2_prm_v1_0_test.yaml -------------------------------------------------------------------------------- /conf/exp/ppo/logiqav2_prm_v1_1_test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/ppo/logiqav2_prm_v1_1_test.yaml -------------------------------------------------------------------------------- /conf/exp/ppo/logiqav2_prm_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/ppo/logiqav2_prm_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/ppo/logiqav2_prm_v2_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/ppo/logiqav2_prm_v2_1.yaml -------------------------------------------------------------------------------- /conf/exp/ppo/logiqav2_prm_v2_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/ppo/logiqav2_prm_v2_2.yaml -------------------------------------------------------------------------------- /conf/exp/ppo/logiqav2_prm_v2_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/ppo/logiqav2_prm_v2_3.yaml -------------------------------------------------------------------------------- /conf/exp/ppo/logiqav2_prm_v2_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/ppo/logiqav2_prm_v2_4.yaml -------------------------------------------------------------------------------- /conf/exp/ppo/logiqav2_prm_v2_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/ppo/logiqav2_prm_v2_5.yaml -------------------------------------------------------------------------------- /conf/exp/ppo/logiqav2_prm_v2_6.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/ppo/logiqav2_prm_v2_6.yaml -------------------------------------------------------------------------------- /conf/exp/ppo/logiqav2_prm_v3_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/ppo/logiqav2_prm_v3_0.yaml -------------------------------------------------------------------------------- /conf/exp/ppo/logiqav2_prm_v4_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/ppo/logiqav2_prm_v4_0.yaml -------------------------------------------------------------------------------- /conf/exp/ppo/logiqav2_prm_v4_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/ppo/logiqav2_prm_v4_1.yaml -------------------------------------------------------------------------------- /conf/exp/ppo/logiqav2_prm_v4_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/ppo/logiqav2_prm_v4_2.yaml -------------------------------------------------------------------------------- /conf/exp/remax/v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/remax/v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/remax/v1_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/remax/v1_2.yaml -------------------------------------------------------------------------------- /conf/exp/remax/v1_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/remax/v1_3.yaml -------------------------------------------------------------------------------- /conf/exp/remax/v1_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/remax/v1_4.yaml -------------------------------------------------------------------------------- /conf/exp/remax/v1_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/remax/v1_5.yaml -------------------------------------------------------------------------------- /conf/exp/remax/v1_6.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/remax/v1_6.yaml -------------------------------------------------------------------------------- /conf/exp/remax/v1_7.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/remax/v1_7.yaml -------------------------------------------------------------------------------- /conf/exp/reward/combine/llama2_7b_70bdistil_prm_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/combine/llama2_7b_70bdistil_prm_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/deepseek/meta_math_cot/prm_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/deepseek/meta_math_cot/prm_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/deepseek/meta_math_cot/prm_v1_0_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/deepseek/meta_math_cot/prm_v1_0_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/deepseek/meta_math_cot/prm_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/deepseek/meta_math_cot/prm_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/reward/deepseek/meta_math_cot/prm_v1_1_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/deepseek/meta_math_cot/prm_v1_1_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/deepseek/meta_math_cot/prm_v1_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/deepseek/meta_math_cot/prm_v1_2.yaml -------------------------------------------------------------------------------- /conf/exp/reward/deepseek/meta_math_cot/prm_v1_2_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/deepseek/meta_math_cot/prm_v1_2_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/deepseek/meta_math_cot/prm_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/deepseek/meta_math_cot/prm_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/deepseek/meta_math_cot/prm_v2_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/deepseek/meta_math_cot/prm_v2_1.yaml -------------------------------------------------------------------------------- /conf/exp/reward/deepseek/meta_math_cot/prm_v2_1_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/deepseek/meta_math_cot/prm_v2_1_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/gemma/meta_math_rap/prm_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/gemma/meta_math_rap/prm_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/gemma/meta_math_rap/prm_v1_0_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/gemma/meta_math_rap/prm_v1_0_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/gemma/meta_math_rap/prm_v1_1_gsm8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/gemma/meta_math_rap/prm_v1_1_gsm8k.yaml -------------------------------------------------------------------------------- /conf/exp/reward/gemma/meta_math_rap/prm_v1_1_math.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/gemma/meta_math_rap/prm_v1_1_math.yaml -------------------------------------------------------------------------------- /conf/exp/reward/gemma/meta_math_rap/prm_v1_2_gsm8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/gemma/meta_math_rap/prm_v1_2_gsm8k.yaml -------------------------------------------------------------------------------- /conf/exp/reward/gemma/meta_math_rap/prm_v1_2_gsm8k_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/gemma/meta_math_rap/prm_v1_2_gsm8k_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/gemma/meta_math_rap/prm_v1_2_math.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/gemma/meta_math_rap/prm_v1_2_math.yaml -------------------------------------------------------------------------------- /conf/exp/reward/gemma/meta_math_rap/prm_v1_2_math_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/gemma/meta_math_rap/prm_v1_2_math_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_orm_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_orm_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_orm_v1_0_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_orm_v1_0_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_orm_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_orm_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_orm_v1_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_orm_v1_2.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_orm_v1_2_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_orm_v1_2_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_iter1_replay_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_iter1_replay_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_iter1_replay_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_iter1_replay_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_iter1_replay_v1_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_iter1_replay_v1_2.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_0_iter1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_0_iter1.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_0_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_0_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_0_predict_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_0_predict_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_0_predict_v1_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_0_predict_v1_2.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_0_predict_v1_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_0_predict_v1_3.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_0_predict_v1_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_0_predict_v1_4.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_1_iter1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_1_iter1.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_2_iter1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_2_iter1.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_3_iter1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v1_3_iter1.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v2_0_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/logiqav2/llama2_7b_70bdistil_prm_v2_0_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/llama2_7b_70bdistil_prm_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/llama2_7b_70bdistil_prm_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/llama2_7b_70bdistil_prm_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/llama2_7b_70bdistil_prm_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/llama2_7b_70bdistil_prm_v1_1_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/llama2_7b_70bdistil_prm_v1_1_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/llama2_7b_70bdistil_prm_v1_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/llama2_7b_70bdistil_prm_v1_2.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/llama2_7b_70bdistil_prm_v1_2_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/llama2_7b_70bdistil_prm_v1_2_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_full_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_full_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v1_0_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v1_0_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v2_0_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v2_0_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v3_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v3_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v4_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v4_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v4_0_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v4_0_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v5_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v5_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v5_0_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v5_0_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v5_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/llama2_7b_70bdistil_rm_v5_1.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/llama2_7b_rm_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/llama2_7b_rm_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/out-of-date/mistral_7b_rm_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/out-of-date/mistral_7b_rm_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_70bdistil_prm_v1_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_70bdistil_prm_v1_2.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_70bdistil_prm_v1_2_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_70bdistil_prm_v1_2_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v1_0_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v1_0_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v1_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v1_2.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v1_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v1_3.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v1_3_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v1_3_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v1_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v1_4.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v1_4_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v1_4_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v2_0_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_gpt351106_prm_v2_0_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_0_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_0_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_0_predict_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_0_predict_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_2.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_2_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_2_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_3.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_4.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_4_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_4_1.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_4_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_4_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_5.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_5_predict.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_5_predict.yaml -------------------------------------------------------------------------------- /conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_5_predict_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/reward/reclor/llama2_7b_mixtral_prm_v1_5_predict_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/sft/llama2_7b_70bdistil_rs_sft_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/llama2_7b_70bdistil_rs_sft_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/sft/llama2_7b_70bdistil_self_sft_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/llama2_7b_70bdistil_self_sft_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/sft/llama2_7b_distil_dpo_sft_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/llama2_7b_distil_dpo_sft_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/sft/llama2_7b_gpt35turbo_dpo_sft_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/llama2_7b_gpt35turbo_dpo_sft_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/sft/llama2_7b_gpt35turbo_dpo_sft_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/llama2_7b_gpt35turbo_dpo_sft_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/sft/llama2_7b_gpt35turbo_dpo_sft_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/llama2_7b_gpt35turbo_dpo_sft_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/sft/llama2_7b_gpt35turbo_dpo_sft_v2_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/llama2_7b_gpt35turbo_dpo_sft_v2_1.yaml -------------------------------------------------------------------------------- /conf/exp/sft/llama2_7b_gpt35turbo_dpo_sft_v2_1_gpu_test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/llama2_7b_gpt35turbo_dpo_sft_v2_1_gpu_test.yaml -------------------------------------------------------------------------------- /conf/exp/sft/llama2_7b_llama2-70b-chat_dpo_sft_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/llama2_7b_llama2-70b-chat_dpo_sft_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/sft/llama2_7b_llama2-70b-chat_dpo_sft_v1_0_tp_test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/llama2_7b_llama2-70b-chat_dpo_sft_v1_0_tp_test.yaml -------------------------------------------------------------------------------- /conf/exp/sft/logiqav2/gemma_2b_llama2-70b-chat_dpo_sft_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/logiqav2/gemma_2b_llama2-70b-chat_dpo_sft_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/sft/logiqav2/llama2_7b_70bdistil_prm_rft_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/logiqav2/llama2_7b_70bdistil_prm_rft_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/sft/logiqav2/llama2_7b_70bdistil_rft_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/logiqav2/llama2_7b_70bdistil_rft_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/sft/logiqav2/llama2_7b_70bdistil_rft_v1_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/logiqav2/llama2_7b_70bdistil_rft_v1_1.yaml -------------------------------------------------------------------------------- /conf/exp/sft/logiqav2/llama2_7b_70bdistil_self_sft_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/logiqav2/llama2_7b_70bdistil_self_sft_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/sft/logiqav2/llama2_7b_70bdistil_self_sft_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/logiqav2/llama2_7b_70bdistil_self_sft_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/sft/reclor/llama2_7b_gpt35_dpo_sft_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/reclor/llama2_7b_gpt35_dpo_sft_v1_0.yaml -------------------------------------------------------------------------------- /conf/exp/sft/reclor/llama2_7b_gpt35_dpo_sft_v2_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/reclor/llama2_7b_gpt35_dpo_sft_v2_0.yaml -------------------------------------------------------------------------------- /conf/exp/sft/reclor/llama2_7b_mixtral_dpo_sft_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/exp/sft/reclor/llama2_7b_mixtral_dpo_sft_v1_0.yaml -------------------------------------------------------------------------------- /conf/hydra/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/hydra/default.yaml -------------------------------------------------------------------------------- /conf/post_process/openai_react.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/post_process/openai_react.yaml -------------------------------------------------------------------------------- /conf/reader/logiqav2/react_service_0shot_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/reader/logiqav2/react_service_0shot_v1_0.yaml -------------------------------------------------------------------------------- /conf/reader/logiqav2/react_service_1shot_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/reader/logiqav2/react_service_1shot_v1_0.yaml -------------------------------------------------------------------------------- /conf/reader/reclor/react_service_0shot_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/reader/reclor/react_service_0shot_v1_0.yaml -------------------------------------------------------------------------------- /conf/reader/reclor/react_service_1shot_v1_0.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/reader/reclor/react_service_1shot_v1_0.yaml -------------------------------------------------------------------------------- /conf/test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/conf/test.yaml -------------------------------------------------------------------------------- /data/ar_lsat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/ar_lsat.py -------------------------------------------------------------------------------- /data/deepseek_math_utils/answer_extraction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/deepseek_math_utils/answer_extraction.py -------------------------------------------------------------------------------- /data/deepseek_math_utils/eval_script.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/deepseek_math_utils/eval_script.py -------------------------------------------------------------------------------- /data/deepseek_math_utils/eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/deepseek_math_utils/eval_utils.py -------------------------------------------------------------------------------- /data/deepseek_math_utils/ocwcourses_eval_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/deepseek_math_utils/ocwcourses_eval_utils.py -------------------------------------------------------------------------------- /data/dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/dpo.py -------------------------------------------------------------------------------- /data/folio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/folio.py -------------------------------------------------------------------------------- /data/general.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/general.py -------------------------------------------------------------------------------- /data/input_aligner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/input_aligner.py -------------------------------------------------------------------------------- /data/logic_combine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/logic_combine.py -------------------------------------------------------------------------------- /data/logiqav2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/logiqav2.py -------------------------------------------------------------------------------- /data/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/math.py -------------------------------------------------------------------------------- /data/math_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/math_util.py -------------------------------------------------------------------------------- /data/meta_math/react_prompt_0.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/meta_math/react_prompt_0.txt -------------------------------------------------------------------------------- /data/meta_math/react_prompt_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/meta_math/react_prompt_1.txt -------------------------------------------------------------------------------- /data/mini_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/mini_dataset.py -------------------------------------------------------------------------------- /data/openai_api_caller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/openai_api_caller.py -------------------------------------------------------------------------------- /data/prompts/ar_lsat/react/train_200006_1-G_1_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/ar_lsat/react/train_200006_1-G_1_1.txt -------------------------------------------------------------------------------- /data/prompts/folio/react/train_1131.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/folio/react/train_1131.txt -------------------------------------------------------------------------------- /data/prompts/folio/react/train_1131_1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/folio/react/train_1131_1.txt -------------------------------------------------------------------------------- /data/prompts/logiqav2/compare_response/prompt_0.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/logiqav2/compare_response/prompt_0.txt -------------------------------------------------------------------------------- /data/prompts/logiqav2/compare_response/template_th_0.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/logiqav2/compare_response/template_th_0.txt -------------------------------------------------------------------------------- /data/prompts/logiqav2/decomposition/gpt4/dev_10741_0.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/logiqav2/decomposition/gpt4/dev_10741_0.md -------------------------------------------------------------------------------- /data/prompts/logiqav2/decomposition/human/dev_218_0.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/logiqav2/decomposition/human/dev_218_0.md -------------------------------------------------------------------------------- /data/prompts/logiqav2/decomposition/llama-2-70b-chat/dev_218_0.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/logiqav2/decomposition/llama-2-70b-chat/dev_218_0.md -------------------------------------------------------------------------------- /data/prompts/logiqav2/gpt4/dev_218_0.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/logiqav2/gpt4/dev_218_0.md -------------------------------------------------------------------------------- /data/prompts/logiqav2/logic_form/gpt4/dev_218_0.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/logiqav2/logic_form/gpt4/dev_218_0.md -------------------------------------------------------------------------------- /data/prompts/logiqav2/logic_form/human/dev_218_0.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/logiqav2/logic_form/human/dev_218_0.md -------------------------------------------------------------------------------- /data/prompts/logiqav2/logic_form/human/dev_218_0_sim.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/logiqav2/logic_form/human/dev_218_0_sim.md -------------------------------------------------------------------------------- /data/prompts/logiqav2/logic_form/human/dev_7261_0.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/logiqav2/logic_form/human/dev_7261_0.md -------------------------------------------------------------------------------- /data/prompts/logiqav2/logic_form/human/dev_7261_0_sim.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/logiqav2/logic_form/human/dev_7261_0_sim.md -------------------------------------------------------------------------------- /data/prompts/logiqav2/logic_form/prompt_0.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/logiqav2/logic_form/prompt_0.md -------------------------------------------------------------------------------- /data/prompts/logiqav2/modify/train_27.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/logiqav2/modify/train_27.txt -------------------------------------------------------------------------------- /data/prompts/logiqav2/modify/train_27_full.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/logiqav2/modify/train_27_full.txt -------------------------------------------------------------------------------- /data/prompts/logiqav2/react/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/logiqav2/react/prompts.py -------------------------------------------------------------------------------- /data/prompts/logiqav2/react/train_4554.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/logiqav2/react/train_4554.txt -------------------------------------------------------------------------------- /data/prompts/logiqav2/react/train_4554_p1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/prompts/logiqav2/react/train_4554_p1.txt -------------------------------------------------------------------------------- /data/reclor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/reclor.py -------------------------------------------------------------------------------- /data/vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/data/vllm.py -------------------------------------------------------------------------------- /general_util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /general_util/average_meter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/general_util/average_meter.py -------------------------------------------------------------------------------- /general_util/dist_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/general_util/dist_utils.py -------------------------------------------------------------------------------- /general_util/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/general_util/evaluator.py -------------------------------------------------------------------------------- /general_util/fsdp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/general_util/fsdp_utils.py -------------------------------------------------------------------------------- /general_util/lightseq_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/general_util/lightseq_utils.py -------------------------------------------------------------------------------- /general_util/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/general_util/logger.py -------------------------------------------------------------------------------- /general_util/mixin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/general_util/mixin.py -------------------------------------------------------------------------------- /general_util/tensorboard_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/general_util/tensorboard_helper.py -------------------------------------------------------------------------------- /general_util/tokenization_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/general_util/tokenization_utils.py -------------------------------------------------------------------------------- /general_util/torch_fsdp_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/general_util/torch_fsdp_utils.py -------------------------------------------------------------------------------- /general_util/training_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/general_util/training_utils.py -------------------------------------------------------------------------------- /general_util/transformer_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/general_util/transformer_engine.py -------------------------------------------------------------------------------- /lora_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/lora_merge.py -------------------------------------------------------------------------------- /lora_share_trainer/grpo_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/lora_share_trainer/grpo_engine.py -------------------------------------------------------------------------------- /lora_share_trainer/lora_share_remax_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/lora_share_trainer/lora_share_remax_engine.py -------------------------------------------------------------------------------- /lora_share_trainer/ppo_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/lora_share_trainer/ppo_engine.py -------------------------------------------------------------------------------- /lora_share_trainer/remax_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/lora_share_trainer/remax_trainer.py -------------------------------------------------------------------------------- /lora_share_trainer/utils/ds_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/lora_share_trainer/utils/ds_utils.py -------------------------------------------------------------------------------- /lora_share_trainer/utils/fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/lora_share_trainer/utils/fp8.py -------------------------------------------------------------------------------- /lora_share_trainer/utils/post_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/lora_share_trainer/utils/post_process.py -------------------------------------------------------------------------------- /lora_share_trainer/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/lora_share_trainer/utils/utils.py -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/main.py -------------------------------------------------------------------------------- /models/gemma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/models/gemma.py -------------------------------------------------------------------------------- /models/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/models/llama.py -------------------------------------------------------------------------------- /models/llama_tp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/models/llama_tp.py -------------------------------------------------------------------------------- /models/megatron_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/models/megatron_llama.py -------------------------------------------------------------------------------- /models/mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/models/mistral.py -------------------------------------------------------------------------------- /models/phi.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/reward_model_mixin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/models/reward_model_mixin.py -------------------------------------------------------------------------------- /models/string_rule_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/models/string_rule_reward.py -------------------------------------------------------------------------------- /models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/models/utils.py -------------------------------------------------------------------------------- /openai_api_caller_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/openai_api_caller_v1.py -------------------------------------------------------------------------------- /post_processors/dist_mixin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/post_processors/dist_mixin.py -------------------------------------------------------------------------------- /post_processors/dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/post_processors/dpo.py -------------------------------------------------------------------------------- /post_processors/openai_api_callback.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/post_processors/openai_api_callback.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/requirements.txt -------------------------------------------------------------------------------- /run_service.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/run_service.sh -------------------------------------------------------------------------------- /run_step_dpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/run_step_dpo.sh -------------------------------------------------------------------------------- /scripts/best_of_filter_by_reward_v1.0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/best_of_filter_by_reward_v1.0.py -------------------------------------------------------------------------------- /scripts/best_of_filter_by_reward_v1.1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/best_of_filter_by_reward_v1.1.py -------------------------------------------------------------------------------- /scripts/best_of_filter_by_reward_v1.2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/best_of_filter_by_reward_v1.2.py -------------------------------------------------------------------------------- /scripts/best_of_filter_by_reward_v1.3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/best_of_filter_by_reward_v1.3.py -------------------------------------------------------------------------------- /scripts/best_of_filter_by_reward_v2.1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/best_of_filter_by_reward_v2.1.py -------------------------------------------------------------------------------- /scripts/best_of_filter_by_reward_v2.2.1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/best_of_filter_by_reward_v2.2.1.py -------------------------------------------------------------------------------- /scripts/best_of_filter_by_reward_v2.2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/best_of_filter_by_reward_v2.2.py -------------------------------------------------------------------------------- /scripts/best_of_filter_by_reward_v2.2_pos_only.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/best_of_filter_by_reward_v2.2_pos_only.py -------------------------------------------------------------------------------- /scripts/best_of_filter_by_reward_v2.2_topk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/best_of_filter_by_reward_v2.2_topk.py -------------------------------------------------------------------------------- /scripts/best_of_filter_by_reward_v2.4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/best_of_filter_by_reward_v2.4.py -------------------------------------------------------------------------------- /scripts/best_of_filter_by_reward_v2.5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/best_of_filter_by_reward_v2.5.py -------------------------------------------------------------------------------- /scripts/best_of_filter_by_reward_v2.6.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/best_of_filter_by_reward_v2.6.py -------------------------------------------------------------------------------- /scripts/best_of_filter_by_reward_v2.7.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/best_of_filter_by_reward_v2.7.py -------------------------------------------------------------------------------- /scripts/best_of_filter_by_reward_v3.0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/best_of_filter_by_reward_v3.0.py -------------------------------------------------------------------------------- /scripts/calculate_acc_w_clean.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/calculate_acc_w_clean.py -------------------------------------------------------------------------------- /scripts/calculate_react_acc_w_clean.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/calculate_react_acc_w_clean.py -------------------------------------------------------------------------------- /scripts/check_rewards_v1.0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/check_rewards_v1.0.py -------------------------------------------------------------------------------- /scripts/combine_reward_debug_v1.0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/combine_reward_debug_v1.0.py -------------------------------------------------------------------------------- /scripts/combine_worsen_response.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/combine_worsen_response.py -------------------------------------------------------------------------------- /scripts/construct_dpo_data_from_react_response.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/construct_dpo_data_from_react_response.py -------------------------------------------------------------------------------- /scripts/construct_dpo_data_from_react_response_v1.1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/construct_dpo_data_from_react_response_v1.1.py -------------------------------------------------------------------------------- /scripts/construct_dpo_data_from_response.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/construct_dpo_data_from_response.py -------------------------------------------------------------------------------- /scripts/construct_dpo_data_via_step_value_v1.1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/construct_dpo_data_via_step_value_v1.1.py -------------------------------------------------------------------------------- /scripts/construct_dpo_data_via_step_value_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/construct_dpo_data_via_step_value_v1.py -------------------------------------------------------------------------------- /scripts/construct_dpo_data_via_step_value_v2.0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/construct_dpo_data_via_step_value_v2.0.py -------------------------------------------------------------------------------- /scripts/construct_dpo_data_via_worsen_response.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/construct_dpo_data_via_worsen_response.py -------------------------------------------------------------------------------- /scripts/cot/collect_rejection_sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/cot/collect_rejection_sampling.py -------------------------------------------------------------------------------- /scripts/cot/cot_clean.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/cot/cot_clean.py -------------------------------------------------------------------------------- /scripts/cot/cot_step_accumulate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/cot/cot_step_accumulate.py -------------------------------------------------------------------------------- /scripts/cot/deepseek_clean.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/cot/deepseek_clean.py -------------------------------------------------------------------------------- /scripts/cot/deepseek_cot_sample_steps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/cot/deepseek_cot_sample_steps.py -------------------------------------------------------------------------------- /scripts/cot/deepseek_dpo_pair_by_reward.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/cot/deepseek_dpo_pair_by_reward.sh -------------------------------------------------------------------------------- /scripts/cot/deepseek_dpo_pair_by_reward_v1.0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/cot/deepseek_dpo_pair_by_reward_v1.0.py -------------------------------------------------------------------------------- /scripts/cot/deepseek_dpo_pair_by_reward_v4.0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/cot/deepseek_dpo_pair_by_reward_v4.0.sh -------------------------------------------------------------------------------- /scripts/cot/deepseek_dpo_pair_by_reward_v5.0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/cot/deepseek_dpo_pair_by_reward_v5.0.sh -------------------------------------------------------------------------------- /scripts/cot/dpo_pair_by_reward.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/cot/dpo_pair_by_reward.sh -------------------------------------------------------------------------------- /scripts/cot/dpo_pair_by_reward_v1.0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/cot/dpo_pair_by_reward_v1.0.py -------------------------------------------------------------------------------- /scripts/cot/meta_math_dpo_pair_by_reward_v1.0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/cot/meta_math_dpo_pair_by_reward_v1.0.py -------------------------------------------------------------------------------- /scripts/cot/rap_clean.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/cot/rap_clean.py -------------------------------------------------------------------------------- /scripts/cot/rap_fix_pred.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/cot/rap_fix_pred.py -------------------------------------------------------------------------------- /scripts/cot/rap_step_accumulate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/cot/rap_step_accumulate.py -------------------------------------------------------------------------------- /scripts/cot/step_contrastive.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/deepspeed/ds_full_checkpoint2hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/deepspeed/ds_full_checkpoint2hf.py -------------------------------------------------------------------------------- /scripts/explore_from_inter/best_of_filter_full.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/explore_from_inter/best_of_filter_full.sh -------------------------------------------------------------------------------- /scripts/explore_from_inter/combine_worsen_response.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/explore_from_inter/combine_worsen_response.sh -------------------------------------------------------------------------------- /scripts/explore_from_inter/reclor/best_of_filter_full.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/explore_from_inter/reclor/best_of_filter_full.sh -------------------------------------------------------------------------------- /scripts/explore_from_inter/reject_sampling_best_of.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/explore_from_inter/reject_sampling_best_of.sh -------------------------------------------------------------------------------- /scripts/explore_from_inter/run_llama_sft_v2.0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/explore_from_inter/run_llama_sft_v2.0.sh -------------------------------------------------------------------------------- /scripts/explore_from_inter/run_llama_v2.0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/explore_from_inter/run_llama_v2.0.sh -------------------------------------------------------------------------------- /scripts/explore_from_inter/run_llama_v2.0_reward.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/explore_from_inter/run_llama_v2.0_reward.sh -------------------------------------------------------------------------------- /scripts/explore_from_inter/run_llama_v3.0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/explore_from_inter/run_llama_v3.0.sh -------------------------------------------------------------------------------- /scripts/explore_from_inter/run_v2.0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/explore_from_inter/run_v2.0.sh -------------------------------------------------------------------------------- /scripts/filter_dpo_pair_by_predict_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/filter_dpo_pair_by_predict_reward.py -------------------------------------------------------------------------------- /scripts/filter_dpo_pair_by_predict_reward_v2.0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/filter_dpo_pair_by_predict_reward_v2.0.py -------------------------------------------------------------------------------- /scripts/fixed_explore_from_infer/logiqav2/best_of_filter_full.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/fixed_explore_from_infer/logiqav2/best_of_filter_full.sh -------------------------------------------------------------------------------- /scripts/fixed_explore_from_infer/logiqav2/best_of_filter_full_iter1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/fixed_explore_from_infer/logiqav2/best_of_filter_full_iter1.sh -------------------------------------------------------------------------------- /scripts/fixed_explore_from_infer/logiqav2/best_of_filter_full_v2.7.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/fixed_explore_from_infer/logiqav2/best_of_filter_full_v2.7.sh -------------------------------------------------------------------------------- /scripts/fixed_explore_from_infer/logiqav2/best_of_filter_full_v3.0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/fixed_explore_from_infer/logiqav2/best_of_filter_full_v3.0.sh -------------------------------------------------------------------------------- /scripts/fixed_explore_from_infer/logiqav2/check_rewards.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/fixed_explore_from_infer/logiqav2/check_rewards.sh -------------------------------------------------------------------------------- /scripts/fixed_explore_from_infer/logiqav2/construct_dpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/fixed_explore_from_infer/logiqav2/construct_dpo.sh -------------------------------------------------------------------------------- /scripts/fixed_explore_from_infer/logiqav2/reward_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/fixed_explore_from_infer/logiqav2/reward_eval.sh -------------------------------------------------------------------------------- /scripts/fixed_explore_from_infer/logiqav2/split_pair.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/fixed_explore_from_infer/logiqav2/split_pair.sh -------------------------------------------------------------------------------- /scripts/fixed_explore_from_infer/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/fixed_explore_from_infer/readme.md -------------------------------------------------------------------------------- /scripts/fixed_explore_from_infer/reclor/best_of_filter_full.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/fixed_explore_from_infer/reclor/best_of_filter_full.sh -------------------------------------------------------------------------------- /scripts/fixed_explore_from_infer/reclor/best_of_filter_gpt35.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/fixed_explore_from_infer/reclor/best_of_filter_gpt35.sh -------------------------------------------------------------------------------- /scripts/fixed_explore_from_infer/reclor/best_of_filter_logiqav2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/fixed_explore_from_infer/reclor/best_of_filter_logiqav2.sh -------------------------------------------------------------------------------- /scripts/fixed_explore_from_infer/reclor/construct_dpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/fixed_explore_from_infer/reclor/construct_dpo.sh -------------------------------------------------------------------------------- /scripts/inference/calibra_results.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/calibra_results.sh -------------------------------------------------------------------------------- /scripts/inference/calibra_results_logiqav2_sc.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/calibra_results_logiqav2_sc.sh -------------------------------------------------------------------------------- /scripts/inference/calibra_results_reclor_sc.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/calibra_results_reclor_sc.sh -------------------------------------------------------------------------------- /scripts/inference/calibra_results_reclor_v1.1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/calibra_results_reclor_v1.1.sh -------------------------------------------------------------------------------- /scripts/inference/run_query_ar_lsat_vllm_v2.1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/run_query_ar_lsat_vllm_v2.1.sh -------------------------------------------------------------------------------- /scripts/inference/run_query_folio_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/run_query_folio_vllm.sh -------------------------------------------------------------------------------- /scripts/inference/run_query_gsm8k_vllm_v1.0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/run_query_gsm8k_vllm_v1.0.sh -------------------------------------------------------------------------------- /scripts/inference/run_query_gsm8k_vllm_v1.1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/run_query_gsm8k_vllm_v1.1.sh -------------------------------------------------------------------------------- /scripts/inference/run_query_logiqav2_dev_order.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/run_query_logiqav2_dev_order.sh -------------------------------------------------------------------------------- /scripts/inference/run_query_logiqav2_order.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/run_query_logiqav2_order.sh -------------------------------------------------------------------------------- /scripts/inference/run_query_logiqav2_vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/run_query_logiqav2_vllm.sh -------------------------------------------------------------------------------- /scripts/inference/run_query_logiqav2_vllm_reclor.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/run_query_logiqav2_vllm_reclor.sh -------------------------------------------------------------------------------- /scripts/inference/run_query_logiqav2_vllm_reclor_v1.1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/run_query_logiqav2_vllm_reclor_v1.1.sh -------------------------------------------------------------------------------- /scripts/inference/run_query_logiqav2_vllm_sc_v2.1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/run_query_logiqav2_vllm_sc_v2.1.sh -------------------------------------------------------------------------------- /scripts/inference/run_query_logiqav2_vllm_v2.1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/run_query_logiqav2_vllm_v2.1.sh -------------------------------------------------------------------------------- /scripts/inference/run_query_math_vllm_v1.0.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/run_query_math_vllm_v1.0.sh -------------------------------------------------------------------------------- /scripts/inference/run_query_math_vllm_v1.1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/run_query_math_vllm_v1.1.sh -------------------------------------------------------------------------------- /scripts/inference/run_query_reclor_vllm_sc_v1.1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/inference/run_query_reclor_vllm_sc_v1.1.sh -------------------------------------------------------------------------------- /scripts/line-scale.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/line-scale.py -------------------------------------------------------------------------------- /scripts/merge_response.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/merge_response.py -------------------------------------------------------------------------------- /scripts/model_converts/llama_hf_mp_split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/model_converts/llama_hf_mp_split.py -------------------------------------------------------------------------------- /scripts/process_inter.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_inter.sh -------------------------------------------------------------------------------- /scripts/process_inter_response.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_inter_response.py -------------------------------------------------------------------------------- /scripts/process_inter_response_v2.0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_inter_response_v2.0.py -------------------------------------------------------------------------------- /scripts/process_inter_response_v2.1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_inter_response_v2.1.py -------------------------------------------------------------------------------- /scripts/process_inter_response_v2.1_by_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_inter_response_v2.1_by_reward.py -------------------------------------------------------------------------------- /scripts/process_inter_response_v2.2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_inter_response_v2.2.py -------------------------------------------------------------------------------- /scripts/process_inter_response_v2.3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_inter_response_v2.3.py -------------------------------------------------------------------------------- /scripts/process_inter_response_v2.3_by_rewards.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_inter_response_v2.3_by_rewards.py -------------------------------------------------------------------------------- /scripts/process_inter_response_v2.4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_inter_response_v2.4.py -------------------------------------------------------------------------------- /scripts/process_inter_response_v2.5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_inter_response_v2.5.py -------------------------------------------------------------------------------- /scripts/process_inter_response_v2.6.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_inter_response_v2.6.py -------------------------------------------------------------------------------- /scripts/process_inter_response_v3.0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_inter_response_v3.0.py -------------------------------------------------------------------------------- /scripts/process_inter_response_v3.1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_inter_response_v3.1.py -------------------------------------------------------------------------------- /scripts/process_inter_response_v3.2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_inter_response_v3.2.py -------------------------------------------------------------------------------- /scripts/process_llama.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_llama.sh -------------------------------------------------------------------------------- /scripts/process_react_nodes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_react_nodes.py -------------------------------------------------------------------------------- /scripts/process_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_sft.sh -------------------------------------------------------------------------------- /scripts/process_sft_inter.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_sft_inter.sh -------------------------------------------------------------------------------- /scripts/process_sft_v2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_sft_v2.sh -------------------------------------------------------------------------------- /scripts/process_turbo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/process_turbo.sh -------------------------------------------------------------------------------- /scripts/react_step_union_find.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/react_step_union_find.py -------------------------------------------------------------------------------- /scripts/react_step_union_find_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/react_step_union_find_v2.py -------------------------------------------------------------------------------- /scripts/reject_sample_best_of_filter_by_reward_v1.0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/reject_sample_best_of_filter_by_reward_v1.0.py -------------------------------------------------------------------------------- /scripts/sample_react_inter_states.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/sample_react_inter_states.py -------------------------------------------------------------------------------- /scripts/sample_react_inter_states_v2.0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/sample_react_inter_states_v2.0.py -------------------------------------------------------------------------------- /scripts/sample_react_inter_states_v2.1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/sample_react_inter_states_v2.1.py -------------------------------------------------------------------------------- /scripts/sample_react_inter_states_v2.2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/sample_react_inter_states_v2.2.py -------------------------------------------------------------------------------- /scripts/sample_react_inter_states_v2.3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/sample_react_inter_states_v2.3.py -------------------------------------------------------------------------------- /scripts/sent_tf_react_step_encoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/sent_tf_react_step_encoding.py -------------------------------------------------------------------------------- /scripts/split_pairs_according_to_ids.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/split_pairs_according_to_ids.py -------------------------------------------------------------------------------- /scripts/split_response_train_dev_according2item_id.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/split_response_train_dev_according2item_id.py -------------------------------------------------------------------------------- /scripts/split_train_dev.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/split_train_dev.py -------------------------------------------------------------------------------- /scripts/win_rate_cmp_v1.0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/scripts/win_rate_cmp_v1.0.py -------------------------------------------------------------------------------- /service_api_caller_v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/service_api_caller_v1.py -------------------------------------------------------------------------------- /service_api_caller_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/service_api_caller_v2.py -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/test.py -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/test.sh -------------------------------------------------------------------------------- /test_deepspeed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/test_deepspeed.py -------------------------------------------------------------------------------- /trainer_base_ds_mul.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/trainer_base_ds_mul.py -------------------------------------------------------------------------------- /trainer_base_ds_mul_fs_tp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/trainer_base_ds_mul_fs_tp.py -------------------------------------------------------------------------------- /trainer_base_ds_mul_te.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/trainer_base_ds_mul_te.py -------------------------------------------------------------------------------- /vllm_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/vllm_inference.py -------------------------------------------------------------------------------- /write_deployment_to_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SparkJiao/dpo-trajectory-reasoning/HEAD/write_deployment_to_cache.py --------------------------------------------------------------------------------