├── .DS_Store ├── README.md ├── analysis_checkpoint_on_dev.py ├── assets └── .gitkeep ├── data ├── .DS_Store ├── data_repo │ ├── .DS_Store │ └── eval_set │ │ └── aime83_23_eval_set_data.jsonl ├── evaluation_dataset │ ├── AI-MO │ │ └── aimo-validation-aime │ │ │ ├── .gitattributes │ │ │ ├── README.md │ │ │ └── data │ │ │ └── train-00000-of-00001.parquet │ ├── aimo-validation-amc │ │ ├── .gitattributes │ │ ├── README.md │ │ └── data │ │ │ └── train-00000-of-00001.parquet │ ├── minerva-math │ │ ├── .gitattributes │ │ ├── README.md │ │ └── test.jsonl │ ├── openai │ │ └── gsm8k │ │ │ ├── .gitattributes │ │ │ ├── README.md │ │ │ ├── main │ │ │ ├── test-00000-of-00001.parquet │ │ │ └── train-00000-of-00001.parquet │ │ │ └── socratic │ │ │ ├── test-00000-of-00001.parquet │ │ │ └── train-00000-of-00001.parquet │ └── qq8933 │ │ └── MATH500 │ │ ├── .gitattributes │ │ ├── README.md │ │ └── test.jsonl └── system_1_2_config │ ├── cot_domain_data_info_14b_system_1_vs_1_init.json │ └── cot_domain_data_info_7b_system_1_vs_1_init.json ├── devset_cache ├── DeepSeek-R1-Distill-Qwen-14B-512-16000.json ├── DeepSeek-R1-Distill-Qwen-7B-512-16000.json ├── Qwen2.5-14B-Instruct-512-16000.json └── Qwen2.5-7B-Instruct-512-16000.json ├── eval_script ├── aba_eval_7b_mixture │ └── eval_7b_aba_static_mixture.sh ├── aba_source_experiment │ ├── eval_checkpoint_aba_easy_sys_2.sh │ ├── eval_checkpoint_aba_hard_sys_1.sh │ ├── eval_checkpoint_aba_medium_sys_1.sh │ └── eval_checkpoint_aba_medium_sys_2.sh ├── eval_advanced_merging │ ├── eval_task_ari_merging.sh │ ├── eval_ties_dare_merging.sh │ └── eval_ties_merging.sh ├── eval_prompt_method │ ├── concisecot_prompt │ │ ├── eval_Deepseek_R1_Distill_Qwen_14B_full_concisecot_prompt.sh │ │ └── eval_Deepseek_R1_Distill_Qwen_7B_full_concisecot_prompt.sh │ └── tale_ep_prompt │ │ ├── eval_Deepseek_R1_Distill_Qwen_14B_full_tale_ep_prompt.sh │ │ └── eval_Deepseek_R1_Distill_Qwen_7B_full_tale_ep_prompt.sh ├── eval_rl_baseline │ ├── eval_simpo │ │ ├── eval_Qwen_14B_R1_simpo_shortest_1_Epoch.sh │ │ └── eval_Qwen_7B_R1_simpo_shortest_1_Epoch.sh │ └── eval_thinkpruner │ │ ├── eval_Qwen_14B_ThinkPruner.sh │ │ └── eval_Qwen_7B_ThinkPruner.sh ├── eval_sft_baseline │ ├── eval_Qwen_14B_R1_CoT_Valve.sh │ └── eval_Qwen_7B_R1_CoT_Valve_1_Epoch.sh └── eval_tldr_weight.sh ├── merge_lora.py ├── pyproject.toml ├── reason_eval_log ├── DS-R1-14B-TLDR │ └── 05_17_19_32 │ │ ├── aime │ │ ├── checkpoint-128_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ ├── metrics │ │ │ └── checkpoint-128_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ └── output_token.txt │ │ ├── amc23 │ │ ├── checkpoint-128_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ ├── metrics │ │ │ └── checkpoint-128_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ └── output_token.txt │ │ ├── gsm8k │ │ ├── checkpoint-128_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ ├── metrics │ │ │ └── checkpoint-128_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ └── output_token.txt │ │ └── math500 │ │ ├── checkpoint-128_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ ├── metrics │ │ └── checkpoint-128_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ └── output_token.txt ├── DS-R1-7B-TLDR │ └── 05_14_17_10 │ │ ├── aime │ │ ├── checkpoint-256_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ ├── metrics │ │ │ └── checkpoint-256_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ └── output_token.txt │ │ ├── amc23 │ │ ├── checkpoint-256_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ ├── metrics │ │ │ └── checkpoint-256_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ └── output_token.txt │ │ ├── gsm8k │ │ ├── checkpoint-256_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ ├── metrics │ │ │ └── checkpoint-256_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ └── output_token.txt │ │ └── math500 │ │ ├── checkpoint-256_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ ├── metrics │ │ └── checkpoint-256_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ └── output_token.txt ├── result-merged-based │ ├── DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B │ │ ├── aime │ │ │ ├── DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ ├── metrics │ │ │ │ └── DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ └── output_token.txt │ │ ├── amc23 │ │ │ ├── DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ ├── metrics │ │ │ │ └── DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ └── output_token.txt │ │ ├── gsm8k │ │ │ ├── DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ │ ├── metrics │ │ │ │ └── DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ │ └── output_token.txt │ │ └── math500 │ │ │ ├── DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ │ ├── metrics │ │ │ └── DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ │ └── output_token.txt │ ├── DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B │ │ ├── aime │ │ │ ├── DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ ├── metrics │ │ │ │ └── DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ └── output_token.txt │ │ ├── amc23 │ │ │ ├── DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ ├── metrics │ │ │ │ └── DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ └── output_token.txt │ │ ├── gsm8k │ │ │ ├── DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ │ ├── metrics │ │ │ │ └── DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ │ └── output_token.txt │ │ └── math500 │ │ │ ├── DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ │ ├── metrics │ │ │ └── DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ │ └── output_token.txt │ ├── DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B │ │ ├── aime │ │ │ ├── DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ ├── metrics │ │ │ │ └── DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ └── output_token.txt │ │ ├── amc23 │ │ │ ├── DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ ├── metrics │ │ │ │ └── DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ └── output_token.txt │ │ ├── gsm8k │ │ │ ├── DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ │ ├── metrics │ │ │ │ └── DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ │ └── output_token.txt │ │ └── math500 │ │ │ ├── DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ │ ├── metrics │ │ │ └── DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ │ └── output_token.txt │ ├── DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B │ │ ├── aime │ │ │ ├── DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ ├── metrics │ │ │ │ └── DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ └── output_token.txt │ │ ├── amc23 │ │ │ ├── DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ ├── metrics │ │ │ │ └── DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ └── output_token.txt │ │ ├── gsm8k │ │ │ ├── DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ │ ├── metrics │ │ │ │ └── DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ │ └── output_token.txt │ │ └── math500 │ │ │ ├── DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ │ ├── metrics │ │ │ └── DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ │ └── output_token.txt │ ├── DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B │ │ ├── aime │ │ │ ├── DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ ├── metrics │ │ │ │ └── DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ └── output_token.txt │ │ ├── amc23 │ │ │ ├── DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ ├── metrics │ │ │ │ └── DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ └── output_token.txt │ │ ├── gsm8k │ │ │ ├── DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ │ ├── metrics │ │ │ │ └── DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ │ └── output_token.txt │ │ └── math500 │ │ │ ├── DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ │ ├── metrics │ │ │ └── DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ │ └── output_token.txt │ └── DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B │ │ ├── aime │ │ ├── DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ ├── metrics │ │ │ └── DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ └── output_token.txt │ │ ├── amc23 │ │ ├── DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ ├── metrics │ │ │ └── DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ └── output_token.txt │ │ ├── gsm8k │ │ ├── DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ ├── metrics │ │ │ └── DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ └── output_token.txt │ │ └── math500 │ │ ├── DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ ├── metrics │ │ └── DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ └── output_token.txt ├── result-rl-baseline │ ├── rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch │ │ └── 05_20_06_12 │ │ │ ├── aime │ │ │ ├── Overthinking-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ ├── metrics │ │ │ │ └── Overthinking-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ └── output_token.txt │ │ │ ├── amc23 │ │ │ ├── Overthinking-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ ├── metrics │ │ │ │ └── Overthinking-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ └── output_token.txt │ │ │ ├── gsm8k │ │ │ ├── Overthinking-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ │ ├── metrics │ │ │ │ └── Overthinking-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ │ └── output_token.txt │ │ │ └── math500 │ │ │ ├── Overthinking-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ │ ├── metrics │ │ │ └── Overthinking-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ │ └── output_token.txt │ ├── rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch │ │ └── 05_20_06_02 │ │ │ ├── aime │ │ │ ├── ThinkPrune-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ ├── metrics │ │ │ │ └── ThinkPrune-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ └── output_token.txt │ │ │ ├── amc23 │ │ │ ├── ThinkPrune-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ ├── metrics │ │ │ │ └── ThinkPrune-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ └── output_token.txt │ │ │ ├── gsm8k │ │ │ ├── ThinkPrune-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ │ ├── metrics │ │ │ │ └── ThinkPrune-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ │ └── output_token.txt │ │ │ └── math500 │ │ │ ├── ThinkPrune-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ │ ├── metrics │ │ │ └── ThinkPrune-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ │ └── output_token.txt │ ├── rl_baseline_Qwen_7B_R1_SimPO_shortest_1_Epoch │ │ └── 04_26_15_51 │ │ │ ├── amc23 │ │ │ ├── dpsk-7b-long-short-simpo-update_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ ├── metrics │ │ │ │ └── dpsk-7b-long-short-simpo-update_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ │ └── output_token.txt │ │ │ └── math500 │ │ │ ├── metrics │ │ │ └── qwq-32b-long-short-simpo_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ │ ├── output_token.txt │ │ │ └── qwq-32b-long-short-simpo_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ └── rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch │ │ └── 05_20_06_03 │ │ ├── aime │ │ ├── ThinkPrune-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ ├── metrics │ │ │ └── ThinkPrune-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ └── output_token.txt │ │ ├── amc23 │ │ ├── ThinkPrune-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ ├── metrics │ │ │ └── ThinkPrune-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ └── output_token.txt │ │ ├── gsm8k │ │ ├── ThinkPrune-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ ├── metrics │ │ │ └── ThinkPrune-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ └── output_token.txt │ │ └── math500 │ │ ├── ThinkPrune-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ ├── metrics │ │ └── ThinkPrune-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ └── output_token.txt └── result-sft-baseline │ ├── DeepSeek-R1-Distill-Qwen-7B-aba-mixture │ └── 05_17_09_03 │ │ ├── aime │ │ ├── metrics │ │ │ └── new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ ├── new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ └── output_token.txt │ │ ├── amc23 │ │ ├── metrics │ │ │ └── new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ ├── new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ │ └── output_token.txt │ │ ├── gsm8k │ │ ├── metrics │ │ │ └── new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ ├── new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ │ └── output_token.txt │ │ └── math500 │ │ ├── metrics │ │ └── new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ ├── new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ │ └── output_token.txt │ └── sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch │ └── 05_19_12_02 │ ├── aime │ ├── 05_19_12_02_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ ├── metrics │ │ └── 05_19_12_02_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ └── output_token.txt │ ├── amc23 │ ├── 05_19_12_02_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ ├── metrics │ │ └── 05_19_12_02_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json │ └── output_token.txt │ ├── gsm8k │ ├── 05_19_12_02_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ ├── metrics │ │ └── 05_19_12_02_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json │ └── output_token.txt │ └── math500 │ ├── 05_19_12_02_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ ├── metrics │ └── 05_19_12_02_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json │ └── output_token.txt ├── requirement.txt ├── scripts ├── __init__.py ├── combine_data.py ├── convert_format.py ├── convert_to_data.py ├── label_math_difficulty.py ├── prompts.py ├── qwen_eval_bon.py ├── response_rewrite.py └── upload_hub.py ├── setup.py ├── skythought ├── __init__.py ├── skythought_evals │ ├── __init__.py │ ├── batch │ │ ├── __init__.py │ │ ├── engines │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── initializer.py │ │ │ └── vllm_engine.py │ │ ├── env_config.py │ │ ├── logging │ │ │ └── __init__.py │ │ ├── pipeline.py │ │ ├── tokenizer.py │ │ ├── utils.py │ │ └── workload.py │ ├── eval.py │ ├── inference_and_check.py │ ├── models │ │ ├── __init__.py │ │ ├── base.py │ │ ├── model_configs.yaml │ │ └── system_prompts │ │ │ └── prime.txt │ ├── ray_configs │ │ └── ray_config.yaml │ ├── requirements.txt │ ├── tasks │ │ ├── __init__.py │ │ ├── aime │ │ │ ├── aime24.yaml │ │ │ ├── aime24_sky.yaml │ │ │ ├── aime25.yaml │ │ │ └── aime_handler.py │ │ ├── amc23 │ │ │ ├── amc23.yaml │ │ │ └── amc23_handler.py │ │ ├── apps │ │ │ ├── apps.yaml │ │ │ ├── apps_handler.py │ │ │ └── apps_util.py │ │ ├── arc │ │ │ ├── arc_c.yaml │ │ │ └── arc_handler.py │ │ ├── base.py │ │ ├── gsm8k │ │ │ ├── gsm8k.yaml │ │ │ └── gsm8k_handler.py │ │ ├── math │ │ │ ├── math500.yaml │ │ │ └── math_handler.py │ │ ├── minervamath │ │ │ ├── minervamath.yaml │ │ │ └── minervamath_handler.py │ │ └── task_util.py │ └── util │ │ ├── __init__.py │ │ ├── common.py │ │ ├── math_parsing_util.py │ │ ├── metrics.py │ │ └── response.py └── tools │ ├── README.md │ ├── guided_search │ ├── mcts.py │ └── rstar_mcts.py │ └── prm │ └── models │ ├── ms_prm.py │ └── qwen_prm.py ├── src ├── accelerate_config │ ├── .ipynb_checkpoints │ │ ├── deepspeed_zero2-checkpoint.yaml │ │ └── deepspeed_zero3-checkpoint.yaml │ ├── deepspeed_zero2.yaml │ └── deepspeed_zero3.yaml ├── config.yaml ├── config │ ├── .ipynb_checkpoints │ │ ├── zero1_config-checkpoint.json │ │ ├── zero1_config_no_optimizer-checkpoint.json │ │ ├── zero1_config_offload_optimizer-checkpoint.json │ │ ├── zero1_config_opt-checkpoint.json │ │ ├── zero2_config-checkpoint.json │ │ ├── zero2_config_accelerate-checkpoint.json │ │ ├── zero2_config_offload_optimizer-checkpoint.json │ │ ├── zero3_config-checkpoint.json │ │ ├── zero3_config_offload_all-checkpoint.json │ │ ├── zero3_config_offload_optimizer-checkpoint.json │ │ └── zero3_config_offload_param-checkpoint.json │ ├── accelerate_config │ │ ├── .ipynb_checkpoints │ │ │ ├── deepspeed_zero2-checkpoint.yaml │ │ │ └── deepspeed_zero3-checkpoint.yaml │ │ ├── deepspeed_zero2.yaml │ │ └── deepspeed_zero3.yaml │ ├── zero1_config.json │ ├── zero1_config_no_optimizer.json │ ├── zero1_config_offload_optimizer.json │ ├── zero1_config_opt.json │ ├── zero2_config.json │ ├── zero2_config_accelerate.json │ ├── zero2_config_offload_optimizer.json │ ├── zero3_config.json │ ├── zero3_config_no_offload.json │ ├── zero3_config_offload_all.json │ ├── zero3_config_offload_optimizer.json │ └── zero3_config_offload_param.json ├── embed_server.py ├── eval.py ├── math_util.py ├── peft │ ├── __init__.py │ ├── import_utils.py │ ├── mapping.py │ ├── peft │ │ ├── __init__.py │ │ ├── mapping.py │ │ ├── peft_model.py │ │ ├── tuners │ │ │ ├── __init__.py │ │ │ ├── lora.py │ │ │ ├── p_tuning.py │ │ │ ├── prefix_tuning.py │ │ │ └── prompt_tuning.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ ├── other.py │ │ │ └── save_and_load.py │ ├── peft_model.py │ ├── shared.py │ ├── tuners │ │ ├── __init__.py │ │ ├── adalora.py │ │ ├── adaption_prompt.py │ │ ├── lora.py │ │ ├── mmoelora.py │ │ ├── mmoeloraS.py │ │ ├── p_tuning.py │ │ ├── prefix_tuning.py │ │ └── prompt_tuning.py │ └── utils │ │ ├── __init__.py │ │ ├── adapters_utils.py │ │ ├── config.py │ │ ├── other.py │ │ └── save_and_load.py ├── train.py ├── train_dynamic_lora │ ├── cache_short_long_cot_model.py │ ├── callbacks.py │ ├── cot_domain.py │ ├── cot_domain_with_filter.py │ ├── data_utils.py │ ├── dynamic_callback.py │ ├── extras │ │ ├── __init__.py │ │ ├── import_utils.py │ │ ├── profiling.py │ │ ├── vllm_client.py │ │ └── vllm_client_new.py │ ├── grpo_config.py │ ├── grpo_trainer.py │ ├── grpo_trainer_utils.py │ ├── models │ │ ├── __init__.py │ │ ├── auxiliary_modules.py │ │ ├── modeling_base.py │ │ ├── modeling_sd_base.py │ │ ├── modeling_value_head.py │ │ ├── sd_utils.py │ │ └── utils.py │ ├── train_stepdpo_sft_lora_doremi_dynamic_sampling_paralley.py │ ├── trainer_cp.py │ └── trainer_dynamic.py ├── train_lora │ ├── train_stepdpo_sft_lora.py │ └── trainer_cp.py └── util.py ├── token_usage_script.py ├── train_script ├── aba_data_source_construct │ ├── aba_sys_1 │ │ ├── aba_sys_1_hard │ │ │ └── aba_sys_1_hard.sh │ │ └── aba_sys_1_medium │ │ │ └── aba_sys_1_medium.sh │ └── aba_sys_2 │ │ ├── aba_sys_2_easy │ │ └── aba_sys_2_easy.sh │ │ └── aba_sys_2_medium │ │ └── aba_sys_2_medium.sh ├── aba_mixture │ └── max_step_system_1_2_data_1_vs_1.sh ├── train_14B_1_vs_1 │ ├── 14b_parameter_serve.sh │ └── max_step_2000_eval_step_32_init_1_vs_1.sh └── train_7B_1_vs_1 │ ├── 7b_parameter_serve.sh │ └── max_step_2000_eval_step_32_init_1_vs_1.sh ├── vllm_log └── serve_log.txt └── zero_2_fp32.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/.DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/README.md -------------------------------------------------------------------------------- /analysis_checkpoint_on_dev.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/analysis_checkpoint_on_dev.py -------------------------------------------------------------------------------- /assets/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/.DS_Store -------------------------------------------------------------------------------- /data/data_repo/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/data_repo/.DS_Store -------------------------------------------------------------------------------- /data/data_repo/eval_set/aime83_23_eval_set_data.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/data_repo/eval_set/aime83_23_eval_set_data.jsonl -------------------------------------------------------------------------------- /data/evaluation_dataset/AI-MO/aimo-validation-aime/.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/evaluation_dataset/AI-MO/aimo-validation-aime/.gitattributes -------------------------------------------------------------------------------- /data/evaluation_dataset/AI-MO/aimo-validation-aime/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/evaluation_dataset/AI-MO/aimo-validation-aime/README.md -------------------------------------------------------------------------------- /data/evaluation_dataset/AI-MO/aimo-validation-aime/data/train-00000-of-00001.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/evaluation_dataset/AI-MO/aimo-validation-aime/data/train-00000-of-00001.parquet -------------------------------------------------------------------------------- /data/evaluation_dataset/aimo-validation-amc/.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/evaluation_dataset/aimo-validation-amc/.gitattributes -------------------------------------------------------------------------------- /data/evaluation_dataset/aimo-validation-amc/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/evaluation_dataset/aimo-validation-amc/README.md -------------------------------------------------------------------------------- /data/evaluation_dataset/aimo-validation-amc/data/train-00000-of-00001.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/evaluation_dataset/aimo-validation-amc/data/train-00000-of-00001.parquet -------------------------------------------------------------------------------- /data/evaluation_dataset/minerva-math/.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/evaluation_dataset/minerva-math/.gitattributes -------------------------------------------------------------------------------- /data/evaluation_dataset/minerva-math/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | license: mit 3 | --- 4 | -------------------------------------------------------------------------------- /data/evaluation_dataset/minerva-math/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/evaluation_dataset/minerva-math/test.jsonl -------------------------------------------------------------------------------- /data/evaluation_dataset/openai/gsm8k/.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/evaluation_dataset/openai/gsm8k/.gitattributes -------------------------------------------------------------------------------- /data/evaluation_dataset/openai/gsm8k/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/evaluation_dataset/openai/gsm8k/README.md -------------------------------------------------------------------------------- /data/evaluation_dataset/openai/gsm8k/main/test-00000-of-00001.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/evaluation_dataset/openai/gsm8k/main/test-00000-of-00001.parquet -------------------------------------------------------------------------------- /data/evaluation_dataset/openai/gsm8k/main/train-00000-of-00001.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/evaluation_dataset/openai/gsm8k/main/train-00000-of-00001.parquet -------------------------------------------------------------------------------- /data/evaluation_dataset/openai/gsm8k/socratic/test-00000-of-00001.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/evaluation_dataset/openai/gsm8k/socratic/test-00000-of-00001.parquet -------------------------------------------------------------------------------- /data/evaluation_dataset/openai/gsm8k/socratic/train-00000-of-00001.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/evaluation_dataset/openai/gsm8k/socratic/train-00000-of-00001.parquet -------------------------------------------------------------------------------- /data/evaluation_dataset/qq8933/MATH500/.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/evaluation_dataset/qq8933/MATH500/.gitattributes -------------------------------------------------------------------------------- /data/evaluation_dataset/qq8933/MATH500/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/evaluation_dataset/qq8933/MATH500/README.md -------------------------------------------------------------------------------- /data/evaluation_dataset/qq8933/MATH500/test.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/evaluation_dataset/qq8933/MATH500/test.jsonl -------------------------------------------------------------------------------- /data/system_1_2_config/cot_domain_data_info_14b_system_1_vs_1_init.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/system_1_2_config/cot_domain_data_info_14b_system_1_vs_1_init.json -------------------------------------------------------------------------------- /data/system_1_2_config/cot_domain_data_info_7b_system_1_vs_1_init.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/data/system_1_2_config/cot_domain_data_info_7b_system_1_vs_1_init.json -------------------------------------------------------------------------------- /devset_cache/DeepSeek-R1-Distill-Qwen-14B-512-16000.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/devset_cache/DeepSeek-R1-Distill-Qwen-14B-512-16000.json -------------------------------------------------------------------------------- /devset_cache/DeepSeek-R1-Distill-Qwen-7B-512-16000.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/devset_cache/DeepSeek-R1-Distill-Qwen-7B-512-16000.json -------------------------------------------------------------------------------- /devset_cache/Qwen2.5-14B-Instruct-512-16000.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/devset_cache/Qwen2.5-14B-Instruct-512-16000.json -------------------------------------------------------------------------------- /devset_cache/Qwen2.5-7B-Instruct-512-16000.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/devset_cache/Qwen2.5-7B-Instruct-512-16000.json -------------------------------------------------------------------------------- /eval_script/aba_eval_7b_mixture/eval_7b_aba_static_mixture.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/aba_eval_7b_mixture/eval_7b_aba_static_mixture.sh -------------------------------------------------------------------------------- /eval_script/aba_source_experiment/eval_checkpoint_aba_easy_sys_2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/aba_source_experiment/eval_checkpoint_aba_easy_sys_2.sh -------------------------------------------------------------------------------- /eval_script/aba_source_experiment/eval_checkpoint_aba_hard_sys_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/aba_source_experiment/eval_checkpoint_aba_hard_sys_1.sh -------------------------------------------------------------------------------- /eval_script/aba_source_experiment/eval_checkpoint_aba_medium_sys_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/aba_source_experiment/eval_checkpoint_aba_medium_sys_1.sh -------------------------------------------------------------------------------- /eval_script/aba_source_experiment/eval_checkpoint_aba_medium_sys_2.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/aba_source_experiment/eval_checkpoint_aba_medium_sys_2.sh -------------------------------------------------------------------------------- /eval_script/eval_advanced_merging/eval_task_ari_merging.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/eval_advanced_merging/eval_task_ari_merging.sh -------------------------------------------------------------------------------- /eval_script/eval_advanced_merging/eval_ties_dare_merging.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/eval_advanced_merging/eval_ties_dare_merging.sh -------------------------------------------------------------------------------- /eval_script/eval_advanced_merging/eval_ties_merging.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/eval_advanced_merging/eval_ties_merging.sh -------------------------------------------------------------------------------- /eval_script/eval_prompt_method/concisecot_prompt/eval_Deepseek_R1_Distill_Qwen_14B_full_concisecot_prompt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/eval_prompt_method/concisecot_prompt/eval_Deepseek_R1_Distill_Qwen_14B_full_concisecot_prompt.sh -------------------------------------------------------------------------------- /eval_script/eval_prompt_method/concisecot_prompt/eval_Deepseek_R1_Distill_Qwen_7B_full_concisecot_prompt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/eval_prompt_method/concisecot_prompt/eval_Deepseek_R1_Distill_Qwen_7B_full_concisecot_prompt.sh -------------------------------------------------------------------------------- /eval_script/eval_prompt_method/tale_ep_prompt/eval_Deepseek_R1_Distill_Qwen_14B_full_tale_ep_prompt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/eval_prompt_method/tale_ep_prompt/eval_Deepseek_R1_Distill_Qwen_14B_full_tale_ep_prompt.sh -------------------------------------------------------------------------------- /eval_script/eval_prompt_method/tale_ep_prompt/eval_Deepseek_R1_Distill_Qwen_7B_full_tale_ep_prompt.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/eval_prompt_method/tale_ep_prompt/eval_Deepseek_R1_Distill_Qwen_7B_full_tale_ep_prompt.sh -------------------------------------------------------------------------------- /eval_script/eval_rl_baseline/eval_simpo/eval_Qwen_14B_R1_simpo_shortest_1_Epoch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/eval_rl_baseline/eval_simpo/eval_Qwen_14B_R1_simpo_shortest_1_Epoch.sh -------------------------------------------------------------------------------- /eval_script/eval_rl_baseline/eval_simpo/eval_Qwen_7B_R1_simpo_shortest_1_Epoch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/eval_rl_baseline/eval_simpo/eval_Qwen_7B_R1_simpo_shortest_1_Epoch.sh -------------------------------------------------------------------------------- /eval_script/eval_rl_baseline/eval_thinkpruner/eval_Qwen_14B_ThinkPruner.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/eval_rl_baseline/eval_thinkpruner/eval_Qwen_14B_ThinkPruner.sh -------------------------------------------------------------------------------- /eval_script/eval_rl_baseline/eval_thinkpruner/eval_Qwen_7B_ThinkPruner.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/eval_rl_baseline/eval_thinkpruner/eval_Qwen_7B_ThinkPruner.sh -------------------------------------------------------------------------------- /eval_script/eval_sft_baseline/eval_Qwen_14B_R1_CoT_Valve.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/eval_sft_baseline/eval_Qwen_14B_R1_CoT_Valve.sh -------------------------------------------------------------------------------- /eval_script/eval_sft_baseline/eval_Qwen_7B_R1_CoT_Valve_1_Epoch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/eval_sft_baseline/eval_Qwen_7B_R1_CoT_Valve_1_Epoch.sh -------------------------------------------------------------------------------- /eval_script/eval_tldr_weight.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/eval_script/eval_tldr_weight.sh -------------------------------------------------------------------------------- /merge_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/merge_lora.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/pyproject.toml -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/aime/checkpoint-128_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/aime/checkpoint-128_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/aime/metrics/checkpoint-128_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/aime/metrics/checkpoint-128_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/aime/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/aime/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/amc23/checkpoint-128_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/amc23/checkpoint-128_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/amc23/metrics/checkpoint-128_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/amc23/metrics/checkpoint-128_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/amc23/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/amc23/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/gsm8k/checkpoint-128_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/gsm8k/checkpoint-128_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/gsm8k/metrics/checkpoint-128_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/gsm8k/metrics/checkpoint-128_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/gsm8k/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/gsm8k/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/math500/checkpoint-128_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/math500/checkpoint-128_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/math500/metrics/checkpoint-128_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/math500/metrics/checkpoint-128_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/math500/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-14B-TLDR/05_17_19_32/math500/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/aime/checkpoint-256_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/aime/checkpoint-256_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/aime/metrics/checkpoint-256_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/aime/metrics/checkpoint-256_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/aime/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/aime/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/amc23/checkpoint-256_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/amc23/checkpoint-256_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/amc23/metrics/checkpoint-256_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/amc23/metrics/checkpoint-256_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/amc23/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/amc23/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/gsm8k/checkpoint-256_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/gsm8k/checkpoint-256_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/gsm8k/metrics/checkpoint-256_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/gsm8k/metrics/checkpoint-256_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/gsm8k/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/gsm8k/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/math500/checkpoint-256_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/math500/checkpoint-256_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/math500/metrics/checkpoint-256_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/math500/metrics/checkpoint-256_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/math500/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/DS-R1-7B-TLDR/05_14_17_10/math500/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/aime/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/aime/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/aime/metrics/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/aime/metrics/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/aime/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/aime/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/amc23/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/amc23/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/amc23/metrics/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/amc23/metrics/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/amc23/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/amc23/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/gsm8k/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/gsm8k/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/gsm8k/metrics/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/gsm8k/metrics/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/gsm8k/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/gsm8k/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/math500/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/math500/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/math500/metrics/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/math500/metrics/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/math500/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Task-Arithmetic-Qwen2.5-14B/math500/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/aime/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/aime/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/aime/metrics/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/aime/metrics/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/aime/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/aime/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/amc23/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/amc23/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/amc23/metrics/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/amc23/metrics/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/amc23/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/amc23/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/gsm8k/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/gsm8k/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/gsm8k/metrics/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/gsm8k/metrics/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/gsm8k/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/gsm8k/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/math500/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/math500/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/math500/metrics/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/math500/metrics/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/math500/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Dare-Merged-Qwen2.5-14B/math500/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/aime/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/aime/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/aime/metrics/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/aime/metrics/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/aime/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/aime/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/amc23/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/amc23/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/amc23/metrics/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/amc23/metrics/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/amc23/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/amc23/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/gsm8k/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/gsm8k/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/gsm8k/metrics/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/gsm8k/metrics/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/gsm8k/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/gsm8k/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/math500/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/math500/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/math500/metrics/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/math500/metrics/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/math500/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-14B-Ties-Merged-Qwen2.5-14B/math500/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/aime/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/aime/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/aime/metrics/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/aime/metrics/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/aime/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/aime/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/amc23/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/amc23/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/amc23/metrics/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/amc23/metrics/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/amc23/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/amc23/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/gsm8k/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/gsm8k/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/gsm8k/metrics/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/gsm8k/metrics/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/gsm8k/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/gsm8k/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/math500/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/math500/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/math500/metrics/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/math500/metrics/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/math500/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Task-Arithmetic-Qwen2.5-Math-7B/math500/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/aime/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/aime/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/aime/metrics/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/aime/metrics/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/aime/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/aime/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/amc23/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/amc23/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/amc23/metrics/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/amc23/metrics/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/amc23/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/amc23/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/gsm8k/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/gsm8k/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/gsm8k/metrics/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/gsm8k/metrics/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/gsm8k/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/gsm8k/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/math500/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/math500/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/math500/metrics/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/math500/metrics/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/math500/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Dare-Merged-Qwen2.5-Math-7B/math500/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/aime/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/aime/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/aime/metrics/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/aime/metrics/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/aime/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/aime/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/amc23/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/amc23/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/amc23/metrics/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/amc23/metrics/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/amc23/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/amc23/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/gsm8k/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/gsm8k/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/gsm8k/metrics/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/gsm8k/metrics/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/gsm8k/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/gsm8k/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/math500/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/math500/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/math500/metrics/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/math500/metrics/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/math500/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-merged-based/DeepSeek-R1-Distill-Qwen-7B-Ties-Merged-Qwen2.5-Math-7B/math500/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/aime/Overthinking-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/aime/Overthinking-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/aime/metrics/Overthinking-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/aime/metrics/Overthinking-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/aime/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/aime/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/amc23/Overthinking-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/amc23/Overthinking-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/amc23/metrics/Overthinking-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/amc23/metrics/Overthinking-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/amc23/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/amc23/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/gsm8k/Overthinking-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/gsm8k/Overthinking-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/gsm8k/metrics/Overthinking-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/gsm8k/metrics/Overthinking-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/gsm8k/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/gsm8k/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/math500/Overthinking-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/math500/Overthinking-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/math500/metrics/Overthinking-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/math500/metrics/Overthinking-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/math500/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_SimPO_shortest_1_Epoch/05_20_06_12/math500/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/aime/ThinkPrune-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/aime/ThinkPrune-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/aime/metrics/ThinkPrune-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/aime/metrics/ThinkPrune-14B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/aime/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/aime/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/amc23/ThinkPrune-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/amc23/ThinkPrune-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/amc23/metrics/ThinkPrune-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/amc23/metrics/ThinkPrune-14B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/amc23/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/amc23/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/gsm8k/ThinkPrune-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/gsm8k/ThinkPrune-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/gsm8k/metrics/ThinkPrune-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/gsm8k/metrics/ThinkPrune-14B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/gsm8k/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/gsm8k/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/math500/ThinkPrune-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/math500/ThinkPrune-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/math500/metrics/ThinkPrune-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/math500/metrics/ThinkPrune-14B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/math500/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_14B_R1_ThinkerPruner_10_Epoch/05_20_06_02/math500/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_SimPO_shortest_1_Epoch/04_26_15_51/amc23/dpsk-7b-long-short-simpo-update_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_SimPO_shortest_1_Epoch/04_26_15_51/amc23/dpsk-7b-long-short-simpo-update_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_SimPO_shortest_1_Epoch/04_26_15_51/amc23/metrics/dpsk-7b-long-short-simpo-update_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_SimPO_shortest_1_Epoch/04_26_15_51/amc23/metrics/dpsk-7b-long-short-simpo-update_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_SimPO_shortest_1_Epoch/04_26_15_51/amc23/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_SimPO_shortest_1_Epoch/04_26_15_51/amc23/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_SimPO_shortest_1_Epoch/04_26_15_51/math500/metrics/qwq-32b-long-short-simpo_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_SimPO_shortest_1_Epoch/04_26_15_51/math500/metrics/qwq-32b-long-short-simpo_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_SimPO_shortest_1_Epoch/04_26_15_51/math500/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_SimPO_shortest_1_Epoch/04_26_15_51/math500/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_SimPO_shortest_1_Epoch/04_26_15_51/math500/qwq-32b-long-short-simpo_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_SimPO_shortest_1_Epoch/04_26_15_51/math500/qwq-32b-long-short-simpo_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/aime/ThinkPrune-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/aime/ThinkPrune-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/aime/metrics/ThinkPrune-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/aime/metrics/ThinkPrune-7B_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/aime/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/aime/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/amc23/ThinkPrune-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/amc23/ThinkPrune-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/amc23/metrics/ThinkPrune-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/amc23/metrics/ThinkPrune-7B_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/amc23/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/amc23/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/gsm8k/ThinkPrune-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/gsm8k/ThinkPrune-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/gsm8k/metrics/ThinkPrune-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/gsm8k/metrics/ThinkPrune-7B_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/gsm8k/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/gsm8k/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/math500/ThinkPrune-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/math500/ThinkPrune-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/math500/metrics/ThinkPrune-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/math500/metrics/ThinkPrune-7B_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/math500/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-rl-baseline/rl_baseline_Qwen_7B_R1_ThinkerPruner_10_Epoch/05_20_06_03/math500/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/aime/metrics/new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/aime/metrics/new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/aime/new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/aime/new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/aime/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/aime/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/amc23/metrics/new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/amc23/metrics/new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/amc23/new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/amc23/new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/amc23/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/amc23/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/gsm8k/metrics/new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/gsm8k/metrics/new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/gsm8k/new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/gsm8k/new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/gsm8k/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/gsm8k/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/math500/metrics/new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/math500/metrics/new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/math500/new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/math500/new_gsm8k_data_1_vs_1_max_len_3200005_15_20_51_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/math500/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/DeepSeek-R1-Distill-Qwen-7B-aba-mixture/05_17_09_03/math500/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/aime/05_19_12_02_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/aime/05_19_12_02_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/aime/metrics/05_19_12_02_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/aime/metrics/05_19_12_02_aime24_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/aime/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/aime/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/amc23/05_19_12_02_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/amc23/05_19_12_02_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/amc23/metrics/05_19_12_02_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/amc23/metrics/05_19_12_02_amc23_train_subset_None_filter_False_s0_e-1_t0.7_n8.json -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/amc23/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/amc23/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/gsm8k/05_19_12_02_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/gsm8k/05_19_12_02_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/gsm8k/metrics/05_19_12_02_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/gsm8k/metrics/05_19_12_02_gsm8k_test_subset_main_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/gsm8k/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/gsm8k/output_token.txt -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/math500/05_19_12_02_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/math500/05_19_12_02_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/math500/metrics/05_19_12_02_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/math500/metrics/05_19_12_02_math500_test_subset_None_filter_False_s0_e-1_t0.7_n1.json -------------------------------------------------------------------------------- /reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/math500/output_token.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/reason_eval_log/result-sft-baseline/sft_baseline_Qwen_R1_14B_R1_lora_2_CoT_Valve_5_Epoch/05_19_12_02/math500/output_token.txt -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/requirement.txt -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/combine_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/scripts/combine_data.py -------------------------------------------------------------------------------- /scripts/convert_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/scripts/convert_format.py -------------------------------------------------------------------------------- /scripts/convert_to_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/scripts/convert_to_data.py -------------------------------------------------------------------------------- /scripts/label_math_difficulty.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/scripts/label_math_difficulty.py -------------------------------------------------------------------------------- /scripts/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/scripts/prompts.py -------------------------------------------------------------------------------- /scripts/qwen_eval_bon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/scripts/qwen_eval_bon.py -------------------------------------------------------------------------------- /scripts/response_rewrite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/scripts/response_rewrite.py -------------------------------------------------------------------------------- /scripts/upload_hub.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/scripts/upload_hub.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/setup.py -------------------------------------------------------------------------------- /skythought/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /skythought/skythought_evals/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /skythought/skythought_evals/batch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/batch/__init__.py -------------------------------------------------------------------------------- /skythought/skythought_evals/batch/engines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/batch/engines/__init__.py -------------------------------------------------------------------------------- /skythought/skythought_evals/batch/engines/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/batch/engines/base.py -------------------------------------------------------------------------------- /skythought/skythought_evals/batch/engines/initializer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/batch/engines/initializer.py -------------------------------------------------------------------------------- /skythought/skythought_evals/batch/engines/vllm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/batch/engines/vllm_engine.py -------------------------------------------------------------------------------- /skythought/skythought_evals/batch/env_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/batch/env_config.py -------------------------------------------------------------------------------- /skythought/skythought_evals/batch/logging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/batch/logging/__init__.py -------------------------------------------------------------------------------- /skythought/skythought_evals/batch/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/batch/pipeline.py -------------------------------------------------------------------------------- /skythought/skythought_evals/batch/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/batch/tokenizer.py -------------------------------------------------------------------------------- /skythought/skythought_evals/batch/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/batch/utils.py -------------------------------------------------------------------------------- /skythought/skythought_evals/batch/workload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/batch/workload.py -------------------------------------------------------------------------------- /skythought/skythought_evals/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/eval.py -------------------------------------------------------------------------------- /skythought/skythought_evals/inference_and_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/inference_and_check.py -------------------------------------------------------------------------------- /skythought/skythought_evals/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/models/__init__.py -------------------------------------------------------------------------------- /skythought/skythought_evals/models/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/models/base.py -------------------------------------------------------------------------------- /skythought/skythought_evals/models/model_configs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/models/model_configs.yaml -------------------------------------------------------------------------------- /skythought/skythought_evals/models/system_prompts/prime.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/models/system_prompts/prime.txt -------------------------------------------------------------------------------- /skythought/skythought_evals/ray_configs/ray_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/ray_configs/ray_config.yaml -------------------------------------------------------------------------------- /skythought/skythought_evals/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/requirements.txt -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/__init__.py -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/aime/aime24.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/aime/aime24.yaml -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/aime/aime24_sky.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/aime/aime24_sky.yaml -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/aime/aime25.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/aime/aime25.yaml -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/aime/aime_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/aime/aime_handler.py -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/amc23/amc23.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/amc23/amc23.yaml -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/amc23/amc23_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/amc23/amc23_handler.py -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/apps/apps.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/apps/apps.yaml -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/apps/apps_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/apps/apps_handler.py -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/apps/apps_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/apps/apps_util.py -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/arc/arc_c.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/arc/arc_c.yaml -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/arc/arc_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/arc/arc_handler.py -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/base.py -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/gsm8k/gsm8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/gsm8k/gsm8k.yaml -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/gsm8k/gsm8k_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/gsm8k/gsm8k_handler.py -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/math/math500.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/math/math500.yaml -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/math/math_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/math/math_handler.py -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/minervamath/minervamath.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/minervamath/minervamath.yaml -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/minervamath/minervamath_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/minervamath/minervamath_handler.py -------------------------------------------------------------------------------- /skythought/skythought_evals/tasks/task_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/tasks/task_util.py -------------------------------------------------------------------------------- /skythought/skythought_evals/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /skythought/skythought_evals/util/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/util/common.py -------------------------------------------------------------------------------- /skythought/skythought_evals/util/math_parsing_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/util/math_parsing_util.py -------------------------------------------------------------------------------- /skythought/skythought_evals/util/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/util/metrics.py -------------------------------------------------------------------------------- /skythought/skythought_evals/util/response.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/skythought_evals/util/response.py -------------------------------------------------------------------------------- /skythought/tools/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/tools/README.md -------------------------------------------------------------------------------- /skythought/tools/guided_search/mcts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/tools/guided_search/mcts.py -------------------------------------------------------------------------------- /skythought/tools/guided_search/rstar_mcts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/tools/guided_search/rstar_mcts.py -------------------------------------------------------------------------------- /skythought/tools/prm/models/ms_prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/tools/prm/models/ms_prm.py -------------------------------------------------------------------------------- /skythought/tools/prm/models/qwen_prm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/skythought/tools/prm/models/qwen_prm.py -------------------------------------------------------------------------------- /src/accelerate_config/.ipynb_checkpoints/deepspeed_zero2-checkpoint.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/accelerate_config/.ipynb_checkpoints/deepspeed_zero2-checkpoint.yaml -------------------------------------------------------------------------------- /src/accelerate_config/.ipynb_checkpoints/deepspeed_zero3-checkpoint.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/accelerate_config/.ipynb_checkpoints/deepspeed_zero3-checkpoint.yaml -------------------------------------------------------------------------------- /src/accelerate_config/deepspeed_zero2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/accelerate_config/deepspeed_zero2.yaml -------------------------------------------------------------------------------- /src/accelerate_config/deepspeed_zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/accelerate_config/deepspeed_zero3.yaml -------------------------------------------------------------------------------- /src/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config.yaml -------------------------------------------------------------------------------- /src/config/.ipynb_checkpoints/zero1_config-checkpoint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/.ipynb_checkpoints/zero1_config-checkpoint.json -------------------------------------------------------------------------------- /src/config/.ipynb_checkpoints/zero1_config_no_optimizer-checkpoint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/.ipynb_checkpoints/zero1_config_no_optimizer-checkpoint.json -------------------------------------------------------------------------------- /src/config/.ipynb_checkpoints/zero1_config_offload_optimizer-checkpoint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/.ipynb_checkpoints/zero1_config_offload_optimizer-checkpoint.json -------------------------------------------------------------------------------- /src/config/.ipynb_checkpoints/zero1_config_opt-checkpoint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/.ipynb_checkpoints/zero1_config_opt-checkpoint.json -------------------------------------------------------------------------------- /src/config/.ipynb_checkpoints/zero2_config-checkpoint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/.ipynb_checkpoints/zero2_config-checkpoint.json -------------------------------------------------------------------------------- /src/config/.ipynb_checkpoints/zero2_config_accelerate-checkpoint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/.ipynb_checkpoints/zero2_config_accelerate-checkpoint.json -------------------------------------------------------------------------------- /src/config/.ipynb_checkpoints/zero2_config_offload_optimizer-checkpoint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/.ipynb_checkpoints/zero2_config_offload_optimizer-checkpoint.json -------------------------------------------------------------------------------- /src/config/.ipynb_checkpoints/zero3_config-checkpoint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/.ipynb_checkpoints/zero3_config-checkpoint.json -------------------------------------------------------------------------------- /src/config/.ipynb_checkpoints/zero3_config_offload_all-checkpoint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/.ipynb_checkpoints/zero3_config_offload_all-checkpoint.json -------------------------------------------------------------------------------- /src/config/.ipynb_checkpoints/zero3_config_offload_optimizer-checkpoint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/.ipynb_checkpoints/zero3_config_offload_optimizer-checkpoint.json -------------------------------------------------------------------------------- /src/config/.ipynb_checkpoints/zero3_config_offload_param-checkpoint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/.ipynb_checkpoints/zero3_config_offload_param-checkpoint.json -------------------------------------------------------------------------------- /src/config/accelerate_config/.ipynb_checkpoints/deepspeed_zero2-checkpoint.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/accelerate_config/.ipynb_checkpoints/deepspeed_zero2-checkpoint.yaml -------------------------------------------------------------------------------- /src/config/accelerate_config/.ipynb_checkpoints/deepspeed_zero3-checkpoint.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/accelerate_config/.ipynb_checkpoints/deepspeed_zero3-checkpoint.yaml -------------------------------------------------------------------------------- /src/config/accelerate_config/deepspeed_zero2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/accelerate_config/deepspeed_zero2.yaml -------------------------------------------------------------------------------- /src/config/accelerate_config/deepspeed_zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/accelerate_config/deepspeed_zero3.yaml -------------------------------------------------------------------------------- /src/config/zero1_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/zero1_config.json -------------------------------------------------------------------------------- /src/config/zero1_config_no_optimizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/zero1_config_no_optimizer.json -------------------------------------------------------------------------------- /src/config/zero1_config_offload_optimizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/zero1_config_offload_optimizer.json -------------------------------------------------------------------------------- /src/config/zero1_config_opt.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/zero1_config_opt.json -------------------------------------------------------------------------------- /src/config/zero2_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/zero2_config.json -------------------------------------------------------------------------------- /src/config/zero2_config_accelerate.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/zero2_config_accelerate.json -------------------------------------------------------------------------------- /src/config/zero2_config_offload_optimizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/zero2_config_offload_optimizer.json -------------------------------------------------------------------------------- /src/config/zero3_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/zero3_config.json -------------------------------------------------------------------------------- /src/config/zero3_config_no_offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/zero3_config_no_offload.json -------------------------------------------------------------------------------- /src/config/zero3_config_offload_all.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/zero3_config_offload_all.json -------------------------------------------------------------------------------- /src/config/zero3_config_offload_optimizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/zero3_config_offload_optimizer.json -------------------------------------------------------------------------------- /src/config/zero3_config_offload_param.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/config/zero3_config_offload_param.json -------------------------------------------------------------------------------- /src/embed_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/embed_server.py -------------------------------------------------------------------------------- /src/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/eval.py -------------------------------------------------------------------------------- /src/math_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/math_util.py -------------------------------------------------------------------------------- /src/peft/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/__init__.py -------------------------------------------------------------------------------- /src/peft/import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/import_utils.py -------------------------------------------------------------------------------- /src/peft/mapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/mapping.py -------------------------------------------------------------------------------- /src/peft/peft/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/peft/__init__.py -------------------------------------------------------------------------------- /src/peft/peft/mapping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/peft/mapping.py -------------------------------------------------------------------------------- /src/peft/peft/peft_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/peft/peft_model.py -------------------------------------------------------------------------------- /src/peft/peft/tuners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/peft/tuners/__init__.py -------------------------------------------------------------------------------- /src/peft/peft/tuners/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/peft/tuners/lora.py -------------------------------------------------------------------------------- /src/peft/peft/tuners/p_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/peft/tuners/p_tuning.py -------------------------------------------------------------------------------- /src/peft/peft/tuners/prefix_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/peft/tuners/prefix_tuning.py -------------------------------------------------------------------------------- /src/peft/peft/tuners/prompt_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/peft/tuners/prompt_tuning.py -------------------------------------------------------------------------------- /src/peft/peft/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/peft/utils/__init__.py -------------------------------------------------------------------------------- /src/peft/peft/utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/peft/utils/config.py -------------------------------------------------------------------------------- /src/peft/peft/utils/other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/peft/utils/other.py -------------------------------------------------------------------------------- /src/peft/peft/utils/save_and_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/peft/utils/save_and_load.py -------------------------------------------------------------------------------- /src/peft/peft_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/peft_model.py -------------------------------------------------------------------------------- /src/peft/shared.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/shared.py -------------------------------------------------------------------------------- /src/peft/tuners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/tuners/__init__.py -------------------------------------------------------------------------------- /src/peft/tuners/adalora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/tuners/adalora.py -------------------------------------------------------------------------------- /src/peft/tuners/adaption_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/tuners/adaption_prompt.py -------------------------------------------------------------------------------- /src/peft/tuners/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/tuners/lora.py -------------------------------------------------------------------------------- /src/peft/tuners/mmoelora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/tuners/mmoelora.py -------------------------------------------------------------------------------- /src/peft/tuners/mmoeloraS.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/tuners/mmoeloraS.py -------------------------------------------------------------------------------- /src/peft/tuners/p_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/tuners/p_tuning.py -------------------------------------------------------------------------------- /src/peft/tuners/prefix_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/tuners/prefix_tuning.py -------------------------------------------------------------------------------- /src/peft/tuners/prompt_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/tuners/prompt_tuning.py -------------------------------------------------------------------------------- /src/peft/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/utils/__init__.py -------------------------------------------------------------------------------- /src/peft/utils/adapters_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/utils/adapters_utils.py -------------------------------------------------------------------------------- /src/peft/utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/utils/config.py -------------------------------------------------------------------------------- /src/peft/utils/other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/utils/other.py -------------------------------------------------------------------------------- /src/peft/utils/save_and_load.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/peft/utils/save_and_load.py -------------------------------------------------------------------------------- /src/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/cache_short_long_cot_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/cache_short_long_cot_model.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/callbacks.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/cot_domain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/cot_domain.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/cot_domain_with_filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/cot_domain_with_filter.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/data_utils.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/dynamic_callback.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/dynamic_callback.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/extras/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/train_dynamic_lora/extras/import_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/extras/import_utils.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/extras/profiling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/extras/profiling.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/extras/vllm_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/extras/vllm_client.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/extras/vllm_client_new.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/extras/vllm_client_new.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/grpo_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/grpo_config.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/grpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/grpo_trainer.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/grpo_trainer_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/grpo_trainer_utils.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/models/__init__.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/models/auxiliary_modules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/models/auxiliary_modules.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/models/modeling_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/models/modeling_base.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/models/modeling_sd_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/models/modeling_sd_base.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/models/modeling_value_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/models/modeling_value_head.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/models/sd_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/models/sd_utils.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/models/utils.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/train_stepdpo_sft_lora_doremi_dynamic_sampling_paralley.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/train_stepdpo_sft_lora_doremi_dynamic_sampling_paralley.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/trainer_cp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/trainer_cp.py -------------------------------------------------------------------------------- /src/train_dynamic_lora/trainer_dynamic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_dynamic_lora/trainer_dynamic.py -------------------------------------------------------------------------------- /src/train_lora/train_stepdpo_sft_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_lora/train_stepdpo_sft_lora.py -------------------------------------------------------------------------------- /src/train_lora/trainer_cp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/train_lora/trainer_cp.py -------------------------------------------------------------------------------- /src/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/src/util.py -------------------------------------------------------------------------------- /token_usage_script.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/token_usage_script.py -------------------------------------------------------------------------------- /train_script/aba_data_source_construct/aba_sys_1/aba_sys_1_hard/aba_sys_1_hard.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/train_script/aba_data_source_construct/aba_sys_1/aba_sys_1_hard/aba_sys_1_hard.sh -------------------------------------------------------------------------------- /train_script/aba_data_source_construct/aba_sys_1/aba_sys_1_medium/aba_sys_1_medium.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/train_script/aba_data_source_construct/aba_sys_1/aba_sys_1_medium/aba_sys_1_medium.sh -------------------------------------------------------------------------------- /train_script/aba_data_source_construct/aba_sys_2/aba_sys_2_easy/aba_sys_2_easy.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/train_script/aba_data_source_construct/aba_sys_2/aba_sys_2_easy/aba_sys_2_easy.sh -------------------------------------------------------------------------------- /train_script/aba_data_source_construct/aba_sys_2/aba_sys_2_medium/aba_sys_2_medium.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/train_script/aba_data_source_construct/aba_sys_2/aba_sys_2_medium/aba_sys_2_medium.sh -------------------------------------------------------------------------------- /train_script/aba_mixture/max_step_system_1_2_data_1_vs_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/train_script/aba_mixture/max_step_system_1_2_data_1_vs_1.sh -------------------------------------------------------------------------------- /train_script/train_14B_1_vs_1/14b_parameter_serve.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/train_script/train_14B_1_vs_1/14b_parameter_serve.sh -------------------------------------------------------------------------------- /train_script/train_14B_1_vs_1/max_step_2000_eval_step_32_init_1_vs_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/train_script/train_14B_1_vs_1/max_step_2000_eval_step_32_init_1_vs_1.sh -------------------------------------------------------------------------------- /train_script/train_7B_1_vs_1/7b_parameter_serve.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/train_script/train_7B_1_vs_1/7b_parameter_serve.sh -------------------------------------------------------------------------------- /train_script/train_7B_1_vs_1/max_step_2000_eval_step_32_init_1_vs_1.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/train_script/train_7B_1_vs_1/max_step_2000_eval_step_32_init_1_vs_1.sh -------------------------------------------------------------------------------- /vllm_log/serve_log.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/vllm_log/serve_log.txt -------------------------------------------------------------------------------- /zero_2_fp32.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zzli2022/TLDR/HEAD/zero_2_fp32.py --------------------------------------------------------------------------------