├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── assets ├── .DS_Store ├── Performace_and_speed.svg ├── SDAR_doc_head.png ├── SDAR_logo.png ├── logo.jpg ├── table1.png ├── table2.png ├── table2_1.png ├── table3.png └── wechat.jpg ├── evaluation ├── README.md ├── environment.yml └── opencompass │ ├── .gitignore │ ├── .owners.yml │ ├── .pre-commit-config-zh-cn.yaml │ ├── .pre-commit-config.yaml │ ├── LICENSE │ ├── MANIFEST.in │ ├── README.md │ ├── README_zh-CN.md │ ├── configs │ └── eval_sdar.py │ ├── dataset-index.yml │ ├── docs │ ├── en │ │ ├── .readthedocs.yaml │ │ ├── Makefile │ │ ├── _static │ │ │ ├── css │ │ │ │ └── readthedocs.css │ │ │ ├── image │ │ │ │ ├── logo.svg │ │ │ │ └── logo_icon.svg │ │ │ └── js │ │ │ │ └── custom.js │ │ ├── _templates │ │ │ ├── 404.html │ │ │ ├── autosummary │ │ │ │ └── class.rst │ │ │ └── callable.rst │ │ ├── advanced_guides │ │ │ ├── accelerator_intro.md │ │ │ ├── circular_eval.md │ │ │ ├── code_eval.md │ │ │ ├── code_eval_service.md │ │ │ ├── contamination_eval.md │ │ │ ├── custom_dataset.md │ │ │ ├── evaluation_lightllm.md │ │ │ ├── evaluation_lmdeploy.md │ │ │ ├── llm_judge.md │ │ │ ├── longeval.md │ │ │ ├── math_verify.md │ │ │ ├── needleinahaystack_eval.md │ │ │ ├── new_dataset.md │ │ │ ├── new_model.md │ │ │ ├── objective_judgelm_evaluation.md │ │ │ ├── persistence.md │ │ │ ├── prompt_attack.md │ │ │ └── subjective_evaluation.md │ │ ├── conf.py │ │ ├── docutils.conf │ │ ├── get_started │ │ │ ├── faq.md │ │ │ ├── installation.md │ │ │ └── quick_start.md │ │ ├── index.rst │ │ ├── notes │ │ │ ├── academic.md │ │ │ ├── contribution_guide.md │ │ │ └── news.md │ │ ├── prompt │ │ │ ├── chain_of_thought.md │ │ │ ├── meta_template.md │ │ │ ├── overview.md │ │ │ └── prompt_template.md │ │ ├── statis.py │ │ ├── tools.md │ │ └── user_guides │ │ │ ├── config.md │ │ │ ├── corebench.md │ │ │ ├── datasets.md │ │ │ ├── deepseek_r1.md │ │ │ ├── evaluation.md │ │ │ ├── experimentation.md │ │ │ ├── framework_overview.md │ │ │ ├── interns1.md │ │ │ ├── metrics.md │ │ │ ├── models.md │ │ │ └── summarizer.md │ └── zh_cn │ │ ├── .readthedocs.yaml │ │ ├── Makefile │ │ ├── _static │ │ ├── css │ │ │ └── readthedocs.css │ │ ├── image │ │ │ ├── logo.svg │ │ │ └── logo_icon.svg │ │ └── js │ │ │ └── custom.js │ │ ├── _templates │ │ ├── 404.html │ │ ├── autosummary │ │ │ └── class.rst │ │ └── callable.rst │ │ ├── advanced_guides │ │ ├── accelerator_intro.md │ │ ├── circular_eval.md │ │ ├── code_eval.md │ │ ├── code_eval_service.md │ │ ├── compassbench_intro.md │ │ ├── compassbench_v2_0.md │ │ ├── contamination_eval.md │ │ ├── custom_dataset.md │ │ ├── evaluation_lightllm.md │ │ ├── evaluation_lmdeploy.md │ │ ├── llm_judge.md │ │ ├── longeval.md │ │ ├── math_verify.md │ │ ├── needleinahaystack_eval.md │ │ ├── new_dataset.md │ │ ├── new_model.md │ │ ├── objective_judgelm_evaluation.md │ │ ├── persistence.md │ │ ├── prompt_attack.md │ │ └── subjective_evaluation.md │ │ ├── conf.py │ │ ├── cp_origin_docs.sh │ │ ├── docutils.conf │ │ ├── get_started │ │ ├── faq.md │ │ ├── installation.md │ │ └── quick_start.md │ │ ├── index.rst │ │ ├── notes │ │ ├── academic.md │ │ ├── contribution_guide.md │ │ └── news.md │ │ ├── prompt │ │ ├── chain_of_thought.md │ │ ├── meta_template.md │ │ ├── overview.md │ │ └── prompt_template.md │ │ ├── statis.py │ │ ├── tools.md │ │ └── user_guides │ │ ├── config.md │ │ ├── corebench.md │ │ ├── datasets.md │ │ ├── deepseek_r1.md │ │ ├── evaluation.md │ │ ├── experimentation.md │ │ ├── framework_overview.md │ │ ├── interns1.md │ │ ├── metrics.md │ │ ├── models.md │ │ └── summarizer.md │ ├── examples │ ├── eval_OlympiadBench.py │ ├── eval_PMMEval.py │ ├── eval_TheoremQA.py │ ├── eval_academic_leaderboard_202407.py │ ├── eval_academic_leaderboard_202412.py │ ├── eval_academic_leaderboard_202502.py │ ├── eval_academic_leaderboard_REALTIME.py │ ├── eval_alaya.py │ ├── eval_api_demo.py │ ├── eval_attack.py │ ├── eval_babilong.py │ ├── eval_base_demo.py │ ├── eval_bench_intern_s1.py │ ├── eval_bluelm_32k_lveval.py │ ├── eval_cascade_evaluator.py │ ├── eval_charm_mem.py │ ├── eval_charm_rea.py │ ├── eval_chat_agent.py │ ├── eval_chat_agent_baseline.py │ ├── eval_chat_demo.py │ ├── eval_chat_last.py │ ├── eval_chembench.py │ ├── eval_chinese_simpleqa.py │ ├── eval_cibench.py │ ├── eval_cibench_api.py │ ├── eval_circular.py │ ├── eval_claude.py │ ├── eval_code_passk.py │ ├── eval_code_passk_repeat_dataset.py │ ├── eval_codeagent.py │ ├── eval_codebench_full.py │ ├── eval_codegeex2.py │ ├── eval_compassarena_subjectivebench.py │ ├── eval_compassarena_subjectivebench_bradleyterry.py │ ├── eval_contamination.py │ ├── eval_corebench_2409_base_objective.py │ ├── eval_corebench_2409_chat_objective.py │ ├── eval_corebench_2409_longcontext.py │ ├── eval_corebench_2409_subjective.py │ ├── eval_deepseek_r1.py │ ├── eval_dingo.py │ ├── eval_ds1000_interpreter.py │ ├── eval_edgellm_demo.py │ ├── eval_eese_api_judge.py │ ├── eval_gpt3.5.py │ ├── eval_gpt4.py │ ├── eval_hellobench.py │ ├── eval_hf_llama2.py │ ├── eval_hf_llama_7b.py │ ├── eval_inference_ppl.py │ ├── eval_internLM.py │ ├── eval_internlm2_chat_keyset.py │ ├── eval_internlm2_keyset.py │ ├── eval_internlm3_math500_thinking.py │ ├── eval_internlm_7b.py │ ├── eval_internlm_chat_lmdeploy_apiserver.py │ ├── eval_internlm_chat_turbomind.py │ ├── eval_internlm_flames_chat.py │ ├── eval_internlm_lmdeploy_apiserver.py │ ├── eval_internlm_math_chat.py │ ├── eval_internlm_turbomind.py │ ├── eval_judge_dataset_all.py │ ├── eval_judgebench.py │ ├── eval_judgerbench.py │ ├── eval_judgerbenchv2.py │ ├── eval_korbench.py │ ├── eval_lightllm.py │ ├── eval_livestembench.py │ ├── eval_llama2_7b.py │ ├── eval_llama2_7b_lveval.py │ ├── eval_llama3_instruct.py │ ├── eval_llm_compression.py │ ├── eval_llm_judge.py │ ├── eval_lmdeploy_demo.py │ ├── eval_longbenchv2.py │ ├── eval_math_llm_judge.py │ ├── eval_math_llm_judge_internal.py │ ├── eval_math_verify.py │ ├── eval_mathbench.py │ ├── eval_mmlu_cf.py │ ├── eval_mmlu_pro.py │ ├── eval_mmlu_with_zero_retriever_overwritten.py │ ├── eval_modelscope_datasets.py │ ├── eval_multi_prompt_demo.py │ ├── eval_musr.py │ ├── eval_needlebench_v2.py │ ├── eval_qwen3.py │ ├── eval_qwen_7b.py │ ├── eval_qwen_7b_chat.py │ ├── eval_qwen_7b_chat_lawbench.py │ ├── eval_rewardbench.py │ ├── eval_rmb.py │ ├── eval_ruler.py │ ├── eval_ruler_fix_tokenizer.py │ ├── eval_rwkv5_3b.py │ ├── eval_simpleqa.py │ ├── eval_subjective.py │ ├── eval_subjective_alpacaeval_official.py │ ├── eval_subjective_bradleyterry.py │ ├── eval_teval.py │ └── eval_with_model_dataset_combinations.py │ ├── opencompass │ ├── __init__.py │ ├── cli │ │ ├── __init__.py │ │ └── main.py │ ├── configs │ │ ├── dataset_collections │ │ │ └── chat_OC15.py │ │ ├── datasets │ │ │ ├── ARC_Prize_Public_Evaluation │ │ │ │ ├── README.md │ │ │ │ ├── arc_prize_public_evaluation_gen.py │ │ │ │ ├── arc_prize_public_evaluation_gen_872059.py │ │ │ │ └── arc_prize_public_evaluation_gen_fedd04.py │ │ │ ├── ARC_c │ │ │ │ ├── ARC_c_clean_ppl.py │ │ │ │ ├── ARC_c_cot_gen_926652.py │ │ │ │ ├── ARC_c_few_shot_gen_e9b043.py │ │ │ │ ├── ARC_c_few_shot_ppl.py │ │ │ │ ├── ARC_c_gen.py │ │ │ │ ├── ARC_c_gen_1e0de5.py │ │ │ │ ├── ARC_c_ppl.py │ │ │ │ ├── ARC_c_ppl_2ef631.py │ │ │ │ ├── ARC_c_ppl_a450bd.py │ │ │ │ └── ARC_c_ppl_d52a21.py │ │ │ ├── ARC_e │ │ │ │ ├── ARC_e_gen.py │ │ │ │ ├── ARC_e_gen_1e0de5.py │ │ │ │ ├── ARC_e_ppl.py │ │ │ │ ├── ARC_e_ppl_2ef631.py │ │ │ │ ├── ARC_e_ppl_a450bd.py │ │ │ │ └── ARC_e_ppl_d52a21.py │ │ │ ├── CARDBiomedBench │ │ │ │ └── CARDBiomedBench_llmjudge_gen_99a231.py │ │ │ ├── CHARM │ │ │ │ ├── README.md │ │ │ │ ├── README_ZH.md │ │ │ │ ├── charm_memory_gen_bbbd53.py │ │ │ │ ├── charm_memory_settings.py │ │ │ │ ├── charm_reason_cot_only_gen_f7b7d3.py │ │ │ │ ├── charm_reason_gen.py │ │ │ │ ├── charm_reason_gen_f8fca2.py │ │ │ │ ├── charm_reason_ppl_3da4de.py │ │ │ │ └── charm_reason_settings.py │ │ │ ├── CIBench │ │ │ │ ├── CIBench_generation_gen_8ab0dc.py │ │ │ │ ├── CIBench_generation_oracle_gen_c4a7c1.py │ │ │ │ ├── CIBench_template_gen_e6b12a.py │ │ │ │ └── CIBench_template_oracle_gen_fecda1.py │ │ │ ├── CLUE_C3 │ │ │ │ ├── CLUE_C3_gen.py │ │ │ │ ├── CLUE_C3_gen_8c358f.py │ │ │ │ ├── CLUE_C3_ppl.py │ │ │ │ ├── CLUE_C3_ppl_56b537.py │ │ │ │ └── CLUE_C3_ppl_e24a31.py │ │ │ ├── CLUE_CMRC │ │ │ │ ├── CLUE_CMRC_gen.py │ │ │ │ ├── CLUE_CMRC_gen_1bd3c8.py │ │ │ │ ├── CLUE_CMRC_gen_3749cd.py │ │ │ │ ├── CLUE_CMRC_gen_8484b9.py │ │ │ │ └── CLUE_CMRC_gen_941108.py │ │ │ ├── CLUE_DRCD │ │ │ │ ├── CLUE_DRCD_gen.py │ │ │ │ ├── CLUE_DRCD_gen_1bd3c8.py │ │ │ │ ├── CLUE_DRCD_gen_3749cd.py │ │ │ │ ├── CLUE_DRCD_gen_8484b9.py │ │ │ │ └── CLUE_DRCD_gen_941108.py │ │ │ ├── CLUE_afqmc │ │ │ │ ├── CLUE_afqmc_gen.py │ │ │ │ ├── CLUE_afqmc_gen_901306.py │ │ │ │ ├── CLUE_afqmc_ppl.py │ │ │ │ ├── CLUE_afqmc_ppl_378c5b.py │ │ │ │ ├── CLUE_afqmc_ppl_6507d7.py │ │ │ │ └── CLUE_afqmc_ppl_7b0c1e.py │ │ │ ├── CLUE_cmnli │ │ │ │ ├── CLUE_cmnli_gen.py │ │ │ │ ├── CLUE_cmnli_gen_1abf97.py │ │ │ │ ├── CLUE_cmnli_gen_51e956.py │ │ │ │ ├── CLUE_cmnli_ppl.py │ │ │ │ ├── CLUE_cmnli_ppl_98dd6e.py │ │ │ │ ├── CLUE_cmnli_ppl_ef69e7.py │ │ │ │ └── CLUE_cmnli_ppl_fdc6de.py │ │ │ ├── CLUE_ocnli │ │ │ │ ├── CLUE_ocnli_gen.py │ │ │ │ ├── CLUE_ocnli_gen_51e956.py │ │ │ │ ├── CLUE_ocnli_gen_c4cb6c.py │ │ │ │ ├── CLUE_ocnli_ppl.py │ │ │ │ ├── CLUE_ocnli_ppl_98dd6e.py │ │ │ │ ├── CLUE_ocnli_ppl_ef69e7.py │ │ │ │ └── CLUE_ocnli_ppl_fdc6de.py │ │ │ ├── ChemBench │ │ │ │ ├── ChemBench_gen.py │ │ │ │ ├── ChemBench_gen_a9f753.py │ │ │ │ ├── ChemBench_llmjudge_gen.py │ │ │ │ └── ChemBench_llmjudge_gen_c584cf.py │ │ │ ├── ClimaQA │ │ │ │ ├── ClimaQA_Gold_llm_judge_gen.py │ │ │ │ ├── ClimaQA_Gold_llm_judge_gen_f15343.py │ │ │ │ ├── ClimaQA_Silver_llm_judge_gen.py │ │ │ │ └── ClimaQA_Silver_llm_judge_gen_f15343.py │ │ │ ├── ClinicBench │ │ │ │ ├── ClinicBench_llmjudge_gen.py │ │ │ │ └── ClinicBench_llmjudge_gen_d09668.py │ │ │ ├── Earth_Silver │ │ │ │ ├── Earth_Silver_gen.py │ │ │ │ ├── Earth_Silver_llmjudge_gen.py │ │ │ │ └── Earth_Silver_llmjudge_gen_46140c.py │ │ │ ├── FewCLUE_bustm │ │ │ │ ├── FewCLUE_bustm_gen.py │ │ │ │ ├── FewCLUE_bustm_gen_634f41.py │ │ │ │ ├── FewCLUE_bustm_ppl.py │ │ │ │ ├── FewCLUE_bustm_ppl_4b16c0.py │ │ │ │ ├── FewCLUE_bustm_ppl_9ef540.py │ │ │ │ └── FewCLUE_bustm_ppl_e53034.py │ │ │ ├── FewCLUE_chid │ │ │ │ ├── FewCLUE_chid_gen.py │ │ │ │ ├── FewCLUE_chid_gen_0a29a2.py │ │ │ │ ├── FewCLUE_chid_ppl.py │ │ │ │ ├── FewCLUE_chid_ppl_8f2872.py │ │ │ │ └── FewCLUE_chid_ppl_acccb5.py │ │ │ ├── FewCLUE_cluewsc │ │ │ │ ├── FewCLUE_cluewsc_gen.py │ │ │ │ ├── FewCLUE_cluewsc_gen_c68933.py │ │ │ │ ├── FewCLUE_cluewsc_ppl.py │ │ │ │ ├── FewCLUE_cluewsc_ppl_12e4e0.py │ │ │ │ ├── FewCLUE_cluewsc_ppl_4284a0.py │ │ │ │ └── FewCLUE_cluewsc_ppl_868415.py │ │ │ ├── FewCLUE_csl │ │ │ │ ├── FewCLUE_csl_gen.py │ │ │ │ ├── FewCLUE_csl_gen_28b223.py │ │ │ │ ├── FewCLUE_csl_gen_87f4a8.py │ │ │ │ ├── FewCLUE_csl_ppl.py │ │ │ │ ├── FewCLUE_csl_ppl_769f8d.py │ │ │ │ └── FewCLUE_csl_ppl_841b62.py │ │ │ ├── FewCLUE_eprstmt │ │ │ │ ├── FewCLUE_eprstmt_gen.py │ │ │ │ ├── FewCLUE_eprstmt_gen_740ea0.py │ │ │ │ ├── FewCLUE_eprstmt_ppl.py │ │ │ │ ├── FewCLUE_eprstmt_ppl_1ce587.py │ │ │ │ └── FewCLUE_eprstmt_ppl_f1e631.py │ │ │ ├── FewCLUE_ocnli_fc │ │ │ │ ├── FewCLUE_ocnli_fc_gen.py │ │ │ │ ├── FewCLUE_ocnli_fc_gen_f97a97.py │ │ │ │ ├── FewCLUE_ocnli_fc_ppl.py │ │ │ │ ├── FewCLUE_ocnli_fc_ppl_9e8b3d.py │ │ │ │ └── FewCLUE_ocnli_fc_ppl_c08300.py │ │ │ ├── FewCLUE_tnews │ │ │ │ ├── FewCLUE_tnews_gen.py │ │ │ │ ├── FewCLUE_tnews_gen_b90e4a.py │ │ │ │ ├── FewCLUE_tnews_ppl.py │ │ │ │ ├── FewCLUE_tnews_ppl_7d1c07.py │ │ │ │ ├── FewCLUE_tnews_ppl_d10e8a.py │ │ │ │ └── FewCLUE_tnews_ppl_fff486.py │ │ │ ├── FinanceIQ │ │ │ │ ├── FinanceIQ_gen.py │ │ │ │ ├── FinanceIQ_gen_e0e6b5.py │ │ │ │ ├── FinanceIQ_ppl.py │ │ │ │ └── FinanceIQ_ppl_42b9bd.py │ │ │ ├── GLUE_CoLA │ │ │ │ ├── GLUE_CoLA_ppl.py │ │ │ │ └── GLUE_CoLA_ppl_77d0df.py │ │ │ ├── GLUE_MRPC │ │ │ │ ├── GLUE_MRPC_ppl.py │ │ │ │ └── GLUE_MRPC_ppl_96564c.py │ │ │ ├── GLUE_QQP │ │ │ │ ├── GLUE_QQP_ppl.py │ │ │ │ └── GLUE_QQP_ppl_250d00.py │ │ │ ├── GaokaoBench │ │ │ │ ├── GaokaoBench_gen.py │ │ │ │ ├── GaokaoBench_gen_5cfe9e.py │ │ │ │ ├── GaokaoBench_mixed.py │ │ │ │ ├── GaokaoBench_mixed_9af5ee.py │ │ │ │ ├── GaokaoBench_no_subjective_gen_4c31db.py │ │ │ │ ├── GaokaoBench_no_subjective_gen_d16acb.py │ │ │ │ ├── GaokaoBench_no_subjective_gen_d21e37.py │ │ │ │ ├── GaokaoBench_prompts.py │ │ │ │ └── README.md │ │ │ ├── HLE │ │ │ │ ├── hle_biomed_llm_verify_gen_6ff468.py │ │ │ │ ├── hle_gen.py │ │ │ │ ├── hle_llmverify_academic.py │ │ │ │ └── hle_llmverify_gen_6ff468.py │ │ │ ├── HealthBench │ │ │ │ └── healthbench_gen_831613.py │ │ │ ├── IFEval │ │ │ │ ├── IFEval.md │ │ │ │ ├── IFEval_gen.py │ │ │ │ ├── IFEval_gen_3321a3.py │ │ │ │ ├── IFEval_gen_353ae7.py │ │ │ │ └── README.md │ │ │ ├── LCBench │ │ │ │ ├── README.md │ │ │ │ ├── lcbench_gen.py │ │ │ │ ├── lcbench_gen_5ff288.py │ │ │ │ ├── lcbench_levels_gen_bb665f.py │ │ │ │ ├── lcbench_repeat10_gen.py │ │ │ │ └── lcbench_repeat10_gen_5ff288.py │ │ │ ├── MMLUArabic │ │ │ │ ├── MMLUArabic_gen.py │ │ │ │ ├── MMLUArabic_gen_326684.py │ │ │ │ ├── MMLUArabic_ppl.py │ │ │ │ ├── MMLUArabic_ppl_d2333a.py │ │ │ │ ├── MMLUArabic_zero_shot_gen.py │ │ │ │ ├── MMLUArabic_zero_shot_gen_3523e0.py │ │ │ │ └── README.md │ │ │ ├── MathBench │ │ │ │ ├── deprecated_mathbench_2024_gen_de9ff9.py │ │ │ │ ├── deprecated_mathbench_agent_gen_48ec47.py │ │ │ │ ├── deprecated_mathbench_agent_gen_fbe13b.py │ │ │ │ ├── deprecated_mathbench_arith_gen_ccd638.py │ │ │ │ ├── deprecated_mathbench_cot_gen_66f329.py │ │ │ │ ├── deprecated_mathbench_gen_7b734b.py │ │ │ │ ├── mathbench_2024_few_shot_mixed_4a3fd4.py │ │ │ │ ├── mathbench_2024_gen_19e486.py │ │ │ │ ├── mathbench_2024_gen_1dc21d.py │ │ │ │ ├── mathbench_2024_gen_4b8f28.py │ │ │ │ ├── mathbench_2024_gen_50a320.py │ │ │ │ ├── mathbench_2024_gen_fc2a24.py │ │ │ │ ├── mathbench_2024_wocircular_gen_1dc21d.py │ │ │ │ ├── mathbench_2024_wocircular_mixed_8eb12b.py │ │ │ │ ├── mathbench_gen.py │ │ │ │ └── mathbench_prompt.py │ │ │ ├── MedBench │ │ │ │ ├── medbench_gen.py │ │ │ │ └── medbench_gen_0b4fff.py │ │ │ ├── MedCalc_Bench │ │ │ │ └── MedCalcBench_official_gen_a5155f.py │ │ │ ├── MedQA │ │ │ │ ├── MedQA_gen_3bf756.py │ │ │ │ └── MedQA_llmjudge_gen_3bf756.py │ │ │ ├── MedXpertQA │ │ │ │ ├── MedXpertQA_gen.py │ │ │ │ └── MedXpertQA_llmjudge_gen.py │ │ │ ├── Medbullets │ │ │ │ ├── medbullets_gen.py │ │ │ │ ├── medbullets_gen_60c8f5.py │ │ │ │ ├── medbullets_llmjudge_gen.py │ │ │ │ └── medbullets_llmjudge_gen_60c8f5.py │ │ │ ├── NPHardEval │ │ │ │ ├── NPHardEval_gen.py │ │ │ │ ├── NPHardEval_gen_22aac5.py │ │ │ │ └── README.md │ │ │ ├── OlymMATH │ │ │ │ ├── README.md │ │ │ │ ├── olymmath_cascade_eval_gen_97b203.py │ │ │ │ ├── olymmath_llm_judeg_gen.py │ │ │ │ └── olymmath_llmverify_gen_97b203.py │ │ │ ├── OlympiadBench │ │ │ │ ├── OlympiadBenchMath_0shot_llmverify_gen_9c22f2.py │ │ │ │ ├── OlympiadBench_0shot_cascade_eval_gen_be8b13.py │ │ │ │ ├── OlympiadBench_0shot_gen_be8b13.py │ │ │ │ ├── OlympiadBench_0shot_llmverify_gen_be8b13.py │ │ │ │ └── OlympiadBench_categories.py │ │ │ ├── OpenFinData │ │ │ │ ├── OpenFinData_gen.py │ │ │ │ ├── OpenFinData_gen_46dedb.py │ │ │ │ └── README.md │ │ │ ├── PHYBench │ │ │ │ └── phybench_gen.py │ │ │ ├── PHYSICS │ │ │ │ ├── PHYSICS_llm_judge_gen.py │ │ │ │ └── PHYSICS_llm_judge_gen_a133a2.py │ │ │ ├── PJExam │ │ │ │ ├── PJExam_gen.py │ │ │ │ └── PJExam_gen_8cd97c.py │ │ │ ├── PMMEval │ │ │ │ ├── flores_gen.py │ │ │ │ ├── flores_gen_2697d7.py │ │ │ │ ├── humanevalxl_gen.py │ │ │ │ ├── humanevalxl_gen_bdec92.py │ │ │ │ ├── mgsm_gen.py │ │ │ │ ├── mgsm_gen_679720.py │ │ │ │ ├── mhellaswag_gen.py │ │ │ │ ├── mhellaswag_gen_1a6b73.py │ │ │ │ ├── mifeval_gen.py │ │ │ │ ├── mifeval_gen_79f8fb.py │ │ │ │ ├── mlogiqa_gen.py │ │ │ │ ├── mlogiqa_gen_36c4f9.py │ │ │ │ ├── mmmlu_gen.py │ │ │ │ ├── mmmlu_gen_d5017d.py │ │ │ │ ├── pmmeval_gen.py │ │ │ │ ├── xnli_gen.py │ │ │ │ └── xnli_gen_973734.py │ │ │ ├── ProteinLMBench │ │ │ │ ├── ProteinLMBench_gen_a67965.py │ │ │ │ └── ProteinLMBench_llmjudge_gen_a67965.py │ │ │ ├── PubMedQA │ │ │ │ ├── PubMedQA_llmjudge_gen.py │ │ │ │ └── PubMedQA_llmjudge_gen_f00302.py │ │ │ ├── QuALITY │ │ │ │ ├── QuALITY.md │ │ │ │ ├── QuALITY_gen.py │ │ │ │ └── QuALITY_gen_c407cb.py │ │ │ ├── R_Bench │ │ │ │ ├── R-Bench.md │ │ │ │ ├── rbench_gen_544610.py │ │ │ │ └── rbench_llmjudge_gen_c89350.py │ │ │ ├── SVAMP │ │ │ │ ├── svamp_gen.py │ │ │ │ └── svamp_gen_fb25e4.py │ │ │ ├── SciEval │ │ │ │ ├── SciEval_5shot_gen_4043d4.py │ │ │ │ ├── SciEval_5shot_llmjudge_gen_b7b684.py │ │ │ │ └── SciEval_lifescience_sets.py │ │ │ ├── SciKnowEval │ │ │ │ ├── SciKnowEval_gen_ebe47d.py │ │ │ │ └── SciKnowEval_llmjudge_gen_ebe47d.py │ │ │ ├── ScienceQA │ │ │ │ ├── ScienceQA_llmjudge_gen.py │ │ │ │ └── ScienceQA_llmjudge_gen_f00302.py │ │ │ ├── SeedBench │ │ │ │ ├── README.md │ │ │ │ ├── seedbench_gen.py │ │ │ │ └── seedbench_gen_5d5ea1.py │ │ │ ├── SimpleQA │ │ │ │ ├── README.md │ │ │ │ ├── simpleqa_gen.py │ │ │ │ └── simpleqa_gen_0283c3.py │ │ │ ├── SmolInstruct │ │ │ │ ├── smolinstruct_0shot_instruct_gen.py │ │ │ │ ├── smolinstruct_fts_0shot_instruct.py │ │ │ │ ├── smolinstruct_fts_gen_5774b5.py │ │ │ │ ├── smolinstruct_gen.py │ │ │ │ ├── smolinstruct_meteor_0shot_instruct.py │ │ │ │ ├── smolinstruct_meteor_gen_065150.py │ │ │ │ ├── smolinstruct_nc_0shot_instruct.py │ │ │ │ ├── smolinstruct_nc_gen_c84c18.py │ │ │ │ ├── smolinstruct_pp_acc_0_shot_instruct.py │ │ │ │ ├── smolinstruct_pp_acc_gen_8607a3.py │ │ │ │ ├── smolinstruct_rmse_0shot_instruct.py │ │ │ │ └── smolinstruct_rmse_gen_0fcc6b.py │ │ │ ├── SuperGLUE_AX_b │ │ │ │ ├── SuperGLUE_AX_b_gen.py │ │ │ │ ├── SuperGLUE_AX_b_gen_4dfefa.py │ │ │ │ ├── SuperGLUE_AX_b_ppl.py │ │ │ │ ├── SuperGLUE_AX_b_ppl_0748aa.py │ │ │ │ └── SuperGLUE_AX_b_ppl_6db806.py │ │ │ ├── SuperGLUE_AX_g │ │ │ │ ├── SuperGLUE_AX_g_gen.py │ │ │ │ ├── SuperGLUE_AX_g_gen_68aac7.py │ │ │ │ ├── SuperGLUE_AX_g_ppl.py │ │ │ │ ├── SuperGLUE_AX_g_ppl_50f8f6.py │ │ │ │ └── SuperGLUE_AX_g_ppl_66caf3.py │ │ │ ├── SuperGLUE_BoolQ │ │ │ │ ├── SuperGLUE_BoolQ_cot_gen_1d56df.py │ │ │ │ ├── SuperGLUE_BoolQ_few_shot_gen_ba58ea.py │ │ │ │ ├── SuperGLUE_BoolQ_few_shot_ppl.py │ │ │ │ ├── SuperGLUE_BoolQ_gen.py │ │ │ │ ├── SuperGLUE_BoolQ_gen_883d50.py │ │ │ │ ├── SuperGLUE_BoolQ_ppl.py │ │ │ │ ├── SuperGLUE_BoolQ_ppl_16b1d9.py │ │ │ │ ├── SuperGLUE_BoolQ_ppl_314797.py │ │ │ │ ├── SuperGLUE_BoolQ_ppl_314b96.py │ │ │ │ ├── SuperGLUE_BoolQ_ppl_4da4db.py │ │ │ │ └── SuperGLUE_BoolQ_ppl_9619db.py │ │ │ ├── SuperGLUE_CB │ │ │ │ ├── SuperGLUE_CB_gen.py │ │ │ │ ├── SuperGLUE_CB_gen_854c6c.py │ │ │ │ ├── SuperGLUE_CB_ppl.py │ │ │ │ ├── SuperGLUE_CB_ppl_0143fe.py │ │ │ │ └── SuperGLUE_CB_ppl_11c175.py │ │ │ ├── SuperGLUE_COPA │ │ │ │ ├── SuperGLUE_COPA_gen.py │ │ │ │ ├── SuperGLUE_COPA_gen_91ca53.py │ │ │ │ ├── SuperGLUE_COPA_ppl.py │ │ │ │ ├── SuperGLUE_COPA_ppl_54058d.py │ │ │ │ ├── SuperGLUE_COPA_ppl_5c24f1.py │ │ │ │ └── SuperGLUE_COPA_ppl_9f3618.py │ │ │ ├── SuperGLUE_MultiRC │ │ │ │ ├── SuperGLUE_MultiRC_gen.py │ │ │ │ ├── SuperGLUE_MultiRC_gen_27071f.py │ │ │ │ ├── SuperGLUE_MultiRC_ppl.py │ │ │ │ ├── SuperGLUE_MultiRC_ppl_866273.py │ │ │ │ └── SuperGLUE_MultiRC_ppl_ced824.py │ │ │ ├── SuperGLUE_RTE │ │ │ │ ├── SuperGLUE_RTE_gen.py │ │ │ │ ├── SuperGLUE_RTE_gen_68aac7.py │ │ │ │ ├── SuperGLUE_RTE_ppl.py │ │ │ │ ├── SuperGLUE_RTE_ppl_50f8f6.py │ │ │ │ └── SuperGLUE_RTE_ppl_66caf3.py │ │ │ ├── SuperGLUE_ReCoRD │ │ │ │ ├── SuperGLUE_ReCoRD_gen.py │ │ │ │ ├── SuperGLUE_ReCoRD_gen_0f7784.py │ │ │ │ ├── SuperGLUE_ReCoRD_gen_30dea0.py │ │ │ │ └── SuperGLUE_ReCoRD_gen_a69961.py │ │ │ ├── SuperGLUE_WSC │ │ │ │ ├── SuperGLUE_WSC_gen.py │ │ │ │ ├── SuperGLUE_WSC_gen_7902a7.py │ │ │ │ ├── SuperGLUE_WSC_gen_fe4bf3.py │ │ │ │ ├── SuperGLUE_WSC_ppl.py │ │ │ │ ├── SuperGLUE_WSC_ppl_003529.py │ │ │ │ ├── SuperGLUE_WSC_ppl_1c4a90.py │ │ │ │ ├── SuperGLUE_WSC_ppl_d0f531.py │ │ │ │ └── SuperGLUE_WSC_ppl_f37e78.py │ │ │ ├── SuperGLUE_WiC │ │ │ │ ├── SuperGLUE_WiC_gen.py │ │ │ │ ├── SuperGLUE_WiC_gen_d06864.py │ │ │ │ ├── SuperGLUE_WiC_ppl.py │ │ │ │ ├── SuperGLUE_WiC_ppl_312de9.py │ │ │ │ ├── SuperGLUE_WiC_ppl_3fb6fd.py │ │ │ │ └── SuperGLUE_WiC_ppl_c926be.py │ │ │ ├── TabMWP │ │ │ │ ├── TabMWP_gen.py │ │ │ │ └── TabMWP_gen_2aef96.py │ │ │ ├── TheoremQA │ │ │ │ ├── README.md │ │ │ │ ├── TheoremQA_5shot_gen_6f0af8.py │ │ │ │ ├── TheoremQA_few_shot_examples.py │ │ │ │ ├── TheoremQA_few_shot_examples_official.py │ │ │ │ ├── TheoremQA_gen.py │ │ │ │ ├── ThroremQA_0shot_cot_gen_8acdf7.py │ │ │ │ ├── deprecated_TheoremQA_gen_424e0a.py │ │ │ │ ├── deprecated_TheoremQA_gen_7009de.py │ │ │ │ ├── deprecated_TheoremQA_gen_ef26ca.py │ │ │ │ ├── deprecated_TheoremQA_post_v2_gen_2c2583.py │ │ │ │ └── deprecated_TheoremQA_post_v2_gen_ef26ca.py │ │ │ ├── XCOPA │ │ │ │ ├── XCOPA_ppl.py │ │ │ │ └── XCOPA_ppl_54058d.py │ │ │ ├── XLSum │ │ │ │ ├── XLSum_gen.py │ │ │ │ └── XLSum_gen_2bb71c.py │ │ │ ├── Xsum │ │ │ │ ├── Xsum_gen.py │ │ │ │ ├── Xsum_gen_31397e.py │ │ │ │ └── Xsum_gen_8ea5f8.py │ │ │ ├── adv_glue │ │ │ │ ├── __init__.py │ │ │ │ ├── adv_glue_mnli │ │ │ │ │ ├── adv_glue_mnli_gen.py │ │ │ │ │ └── adv_glue_mnli_gen_bd8ef0.py │ │ │ │ ├── adv_glue_mnli_mm │ │ │ │ │ ├── adv_glue_mnli_mm_gen.py │ │ │ │ │ └── adv_glue_mnli_mm_gen_bd8ef0.py │ │ │ │ ├── adv_glue_qnli │ │ │ │ │ ├── adv_glue_qnli_gen.py │ │ │ │ │ └── adv_glue_qnli_gen_0b7326.py │ │ │ │ ├── adv_glue_qqp │ │ │ │ │ ├── adv_glue_qqp_gen.py │ │ │ │ │ └── adv_glue_qqp_gen_cdc277.py │ │ │ │ ├── adv_glue_rte │ │ │ │ │ ├── adv_glue_rte_gen.py │ │ │ │ │ └── adv_glue_rte_gen_8cc547.py │ │ │ │ └── adv_glue_sst2 │ │ │ │ │ ├── adv_glue_sst2_gen.py │ │ │ │ │ └── adv_glue_sst2_gen_ee8d3b.py │ │ │ ├── agieval │ │ │ │ ├── agieval_gen.py │ │ │ │ ├── agieval_gen_397d81.py │ │ │ │ ├── agieval_gen_617738.py │ │ │ │ ├── agieval_gen_64afd3.py │ │ │ │ ├── agieval_gen_a0c741.py │ │ │ │ ├── agieval_mixed.py │ │ │ │ └── agieval_mixed_0fa998.py │ │ │ ├── aime2024 │ │ │ │ ├── README.md │ │ │ │ ├── aime2024_0shot_nocot_gen_2b9dc2.py │ │ │ │ ├── aime2024_0shot_nocot_genericllmeval_academic_gen.py │ │ │ │ ├── aime2024_0shot_nocot_genericllmeval_gen_2b9dc2.py │ │ │ │ ├── aime2024_cascade_eval_gen_5e9f4f.py │ │ │ │ ├── aime2024_gen.py │ │ │ │ ├── aime2024_gen_17d799.py │ │ │ │ ├── aime2024_gen_6e39a4.py │ │ │ │ ├── aime2024_llmjudge_gen.py │ │ │ │ ├── aime2024_llmjudge_gen_5e9f4f.py │ │ │ │ ├── aime2024_llmverify_repeat16_gen_bf7475.py │ │ │ │ └── aime2024_llmverify_repeat8_gen_e8fcee.py │ │ │ ├── aime2025 │ │ │ │ ├── aime2025_cascade_eval_gen_5e9f4f.py │ │ │ │ ├── aime2025_llmjudge_academic.py │ │ │ │ └── aime2025_llmjudge_gen_5e9f4f.py │ │ │ ├── anli │ │ │ │ ├── anli_gen.py │ │ │ │ ├── anli_gen_fc7328.py │ │ │ │ ├── anli_ppl.py │ │ │ │ └── anli_ppl_1d290e.py │ │ │ ├── anthropics_evals │ │ │ │ ├── airisk_gen.py │ │ │ │ ├── airisk_gen_ba66fc.py │ │ │ │ ├── persona_gen.py │ │ │ │ ├── persona_gen_cc72e2.py │ │ │ │ ├── sycophancy_gen.py │ │ │ │ └── sycophancy_gen_4bba45.py │ │ │ ├── apps │ │ │ │ ├── README.md │ │ │ │ ├── apps_gen.py │ │ │ │ ├── apps_gen_c7893a.py │ │ │ │ ├── apps_mini_gen.py │ │ │ │ ├── apps_mini_gen_c7893a.py │ │ │ │ ├── deprecated_apps_gen_5b4254.py │ │ │ │ ├── deprecated_apps_gen_7fbb95.py │ │ │ │ └── deprecated_apps_gen_b4dee3.py │ │ │ ├── babilong │ │ │ │ ├── README.md │ │ │ │ ├── babilong_0k_gen.py │ │ │ │ ├── babilong_128k_gen.py │ │ │ │ ├── babilong_16k_gen.py │ │ │ │ ├── babilong_1m_gen.py │ │ │ │ ├── babilong_256k_gen.py │ │ │ │ ├── babilong_2k_gen.py │ │ │ │ ├── babilong_32k_gen.py │ │ │ │ └── babilong_4k_gen.py │ │ │ ├── bbeh │ │ │ │ ├── README.md │ │ │ │ ├── bbeh_gen.py │ │ │ │ └── bbeh_llmjudge_gen_86c3a0.py │ │ │ ├── bbh │ │ │ │ ├── README.md │ │ │ │ ├── bbh_0shot_nocot_academic_gen.py │ │ │ │ ├── bbh_0shot_nocot_gen_925fc4.py │ │ │ │ ├── bbh_0shot_nocot_gen_9c32f6.py │ │ │ │ ├── bbh_0shot_nocot_gen_ea7952.py │ │ │ │ ├── bbh_gen.py │ │ │ │ ├── bbh_gen_2879b0.py │ │ │ │ ├── bbh_gen_4a31fa.py │ │ │ │ ├── bbh_gen_5b92b0.py │ │ │ │ ├── bbh_gen_5bf00b.py │ │ │ │ ├── bbh_gen_98fba6.py │ │ │ │ ├── bbh_gen_ee62e9.py │ │ │ │ ├── bbh_llm_judge_gen.py │ │ │ │ ├── bbh_llmjudge_gen_b5bdf1.py │ │ │ │ └── bbh_subset_settings.py │ │ │ ├── bigcodebench │ │ │ │ ├── bigcodebench_full_complete_gen.py │ │ │ │ ├── bigcodebench_full_complete_gen_faf748.py │ │ │ │ ├── bigcodebench_full_instruct_gen.py │ │ │ │ ├── bigcodebench_full_instruct_gen_8815eb.py │ │ │ │ ├── bigcodebench_full_instruct_repeat_gen_c3d5ad.py │ │ │ │ ├── bigcodebench_gen.py │ │ │ │ ├── bigcodebench_hard_complete_gen.py │ │ │ │ ├── bigcodebench_hard_complete_gen_2888d3.py │ │ │ │ ├── bigcodebench_hard_complete_gen_faf748.py │ │ │ │ ├── bigcodebench_hard_instruct_gen.py │ │ │ │ ├── bigcodebench_hard_instruct_gen_8815eb.py │ │ │ │ ├── bigcodebench_hard_instruct_gen_c3d5ad.py │ │ │ │ └── bigcodebench_hard_instruct_repeat_gen_c3d5ad.py │ │ │ ├── calm │ │ │ │ ├── README.md │ │ │ │ └── calm.py │ │ │ ├── ceval │ │ │ │ ├── README.md │ │ │ │ ├── ceval_clean_ppl.py │ │ │ │ ├── ceval_gen.py │ │ │ │ ├── ceval_gen_2daf24.py │ │ │ │ ├── ceval_gen_5f30c7.py │ │ │ │ ├── ceval_internal_ppl_1cd8bf.py │ │ │ │ ├── ceval_internal_ppl_93e5ce.py │ │ │ │ ├── ceval_ppl.py │ │ │ │ ├── ceval_ppl_1cd8bf.py │ │ │ │ ├── ceval_ppl_578f8d.py │ │ │ │ ├── ceval_ppl_93e5ce.py │ │ │ │ └── ceval_zero_shot_gen_bd40ef.py │ │ │ ├── chem_exam │ │ │ │ ├── competition_gen.py │ │ │ │ └── gaokao_gen.py │ │ │ ├── chinese_simpleqa │ │ │ │ ├── README.md │ │ │ │ └── chinese_simpleqa_gen.py │ │ │ ├── civilcomments │ │ │ │ ├── civilcomments_clp.py │ │ │ │ ├── civilcomments_clp_6a2561.py │ │ │ │ └── civilcomments_clp_a3c5fd.py │ │ │ ├── clozeTest_maxmin │ │ │ │ ├── clozeTest_maxmin_gen.py │ │ │ │ └── clozeTest_maxmin_gen_c205fb.py │ │ │ ├── cmb │ │ │ │ ├── cmb_gen.py │ │ │ │ └── cmb_gen_dfb5c4.py │ │ │ ├── cmmlu │ │ │ │ ├── cmmlu_0shot_cot_gen_305931.py │ │ │ │ ├── cmmlu_0shot_nocot_llmjudge_gen_e1cd9a.py │ │ │ │ ├── cmmlu_gen.py │ │ │ │ ├── cmmlu_gen_c13365.py │ │ │ │ ├── cmmlu_llm_judge_gen.py │ │ │ │ ├── cmmlu_llmjudge_gen_e1cd9a.py │ │ │ │ ├── cmmlu_ppl.py │ │ │ │ ├── cmmlu_ppl_041cbf.py │ │ │ │ ├── cmmlu_ppl_8b9c76.py │ │ │ │ ├── cmmlu_stem_0shot_nocot_gen_3653db.py │ │ │ │ ├── cmmlu_stem_0shot_nocot_llmjudge_gen_3653db.py │ │ │ │ └── cmmlu_stem_0shot_nocot_xml_gen_3653db.py │ │ │ ├── cmo_fib │ │ │ │ ├── README.md │ │ │ │ ├── cmo_fib_0shot_notcot_gen_4c6c29.py │ │ │ │ ├── cmo_fib_gen.py │ │ │ │ ├── cmo_fib_gen_2783e5.py │ │ │ │ └── cmo_fib_gen_ace24b.py │ │ │ ├── codecompass │ │ │ │ └── codecompass_gen_079a6c.py │ │ │ ├── collections │ │ │ │ ├── base_core.py │ │ │ │ ├── base_medium.py │ │ │ │ ├── base_medium_llama.py │ │ │ │ ├── base_small.py │ │ │ │ ├── chat_core.py │ │ │ │ ├── chat_medium.py │ │ │ │ ├── chat_small.py │ │ │ │ ├── example.py │ │ │ │ └── leaderboard │ │ │ │ │ ├── qwen.py │ │ │ │ │ └── qwen_chat.py │ │ │ ├── commonsenseqa │ │ │ │ ├── commonsenseqa_7shot_cot_gen_734a22.py │ │ │ │ ├── commonsenseqa_gen.py │ │ │ │ ├── commonsenseqa_gen_1da2d0.py │ │ │ │ ├── commonsenseqa_gen_c946f2.py │ │ │ │ ├── commonsenseqa_ppl.py │ │ │ │ ├── commonsenseqa_ppl_3e9f2d.py │ │ │ │ ├── commonsenseqa_ppl_5545e2.py │ │ │ │ ├── commonsenseqa_ppl_716f78.py │ │ │ │ ├── commonsenseqa_ppl_c49e77.py │ │ │ │ └── commonsenseqa_ppl_e51e32.py │ │ │ ├── commonsenseqa_cn │ │ │ │ ├── commonsenseqacn_gen.py │ │ │ │ ├── commonsenseqacn_gen_d380d0.py │ │ │ │ ├── commonsenseqacn_ppl.py │ │ │ │ └── commonsenseqacn_ppl_971f48.py │ │ │ ├── compassbench_20_v1_1 │ │ │ │ ├── agent │ │ │ │ │ ├── cibench_template_gen_e6b12a.py │ │ │ │ │ └── mus_teval_gen_105c48.py │ │ │ │ ├── code │ │ │ │ │ └── compassbench_v1_1_code_gen_986f01.py │ │ │ │ ├── knowledge │ │ │ │ │ └── compassbench_v1_knowledge_gen_bd74e0.py │ │ │ │ ├── language │ │ │ │ │ └── compassbench_v1_language_gen_7aa06d.py │ │ │ │ ├── math │ │ │ │ │ ├── compassbench_v1_1_math_gen_1dc21d.py │ │ │ │ │ └── mathbench_prompt.py │ │ │ │ └── reason │ │ │ │ │ └── compassbench_v1_reason_gen_d26d08.py │ │ │ ├── compassbench_20_v1_1_public │ │ │ │ ├── agent │ │ │ │ │ ├── cibench_template_gen_e6b12a.py │ │ │ │ │ └── mus_teval_gen_105c48.py │ │ │ │ ├── code │ │ │ │ │ └── compassbench_v1_1_code_gen_986f01.py │ │ │ │ ├── knowledge │ │ │ │ │ └── compassbench_v1_knowledge_gen_bd74e0.py │ │ │ │ ├── language │ │ │ │ │ └── compassbench_v1_language_gen_7aa06d.py │ │ │ │ ├── math │ │ │ │ │ ├── compassbench_v1_1_math_gen_1dc21d.py │ │ │ │ │ └── mathbench_prompt.py │ │ │ │ └── reason │ │ │ │ │ └── compassbench_v1_reason_gen_d26d08.py │ │ │ ├── compassbench_v1_3 │ │ │ │ ├── compassbench_v1_3_code_gen_c8c3aa.py │ │ │ │ ├── compassbench_v1_3_knowledge.py │ │ │ │ ├── compassbench_v1_3_math.py │ │ │ │ ├── compassbench_v1_3_objective_gen.py │ │ │ │ ├── compassbench_v1_3_objective_gen_068af0.py │ │ │ │ └── compassbench_v1_3_prompt.py │ │ │ ├── contamination │ │ │ │ ├── ceval_contamination_ppl_810ec6.py │ │ │ │ ├── mbpp_contamination_ppl_f01cb6.py │ │ │ │ └── mmlu_contamination_ppl_810ec6.py │ │ │ ├── crowspairs │ │ │ │ ├── crowspairs_gen.py │ │ │ │ ├── crowspairs_gen_02b6c1.py │ │ │ │ ├── crowspairs_gen_381af0.py │ │ │ │ ├── crowspairs_ppl.py │ │ │ │ ├── crowspairs_ppl_47f211.py │ │ │ │ └── crowspairs_ppl_e811e1.py │ │ │ ├── crowspairs_cn │ │ │ │ ├── crowspairscn_gen.py │ │ │ │ ├── crowspairscn_gen_556dc9.py │ │ │ │ ├── crowspairscn_ppl.py │ │ │ │ └── crowspairscn_ppl_f53575.py │ │ │ ├── cvalues │ │ │ │ ├── cvalues_responsibility_gen.py │ │ │ │ └── cvalues_responsibility_gen_543378.py │ │ │ ├── demo │ │ │ │ ├── demo_cmmlu_base_ppl.py │ │ │ │ ├── demo_cmmlu_chat_gen.py │ │ │ │ ├── demo_gsm8k_base_gen.py │ │ │ │ ├── demo_gsm8k_chat_gen.py │ │ │ │ ├── demo_math_base_gen.py │ │ │ │ └── demo_math_chat_gen.py │ │ │ ├── dingo │ │ │ │ └── dingo_gen.py │ │ │ ├── drop │ │ │ │ ├── deprecated_drop_gen_8a9ed9.py │ │ │ │ ├── drop_examples.py │ │ │ │ ├── drop_gen.py │ │ │ │ ├── drop_gen_a2697c.py │ │ │ │ ├── drop_gen_eb14af.py │ │ │ │ ├── drop_llm_judge_gen.py │ │ │ │ ├── drop_llmjudge_gen_3857b0.py │ │ │ │ └── drop_openai_simple_evals_gen_3857b0.py │ │ │ ├── ds1000 │ │ │ │ ├── ds1000_compl_gen_cbc84f.py │ │ │ │ ├── ds1000_compl_service_eval_gen_cbc84f.py │ │ │ │ ├── ds1000_gen_5c4bec.py │ │ │ │ ├── ds1000_gen_cbc84f.py │ │ │ │ └── ds1000_service_eval_gen_cbc84f.py │ │ │ ├── eese │ │ │ │ └── eese_llm_judge_gen.py │ │ │ ├── flores │ │ │ │ ├── flores_gen.py │ │ │ │ ├── flores_gen_806ede.py │ │ │ │ └── flores_gen_aad4fd.py │ │ │ ├── game24 │ │ │ │ ├── game24_gen.py │ │ │ │ └── game24_gen_52a460.py │ │ │ ├── gaokao_math │ │ │ │ ├── README.md │ │ │ │ └── gaokao_math_gen_f5fd28.py │ │ │ ├── govrepcrs │ │ │ │ ├── govrepcrs_gen.py │ │ │ │ ├── govrepcrs_gen_aa5eb3.py │ │ │ │ └── govrepcrs_gen_db7930.py │ │ │ ├── gpqa │ │ │ │ ├── README.md │ │ │ │ ├── gpqa_0shot_nocot_gen_772ea0.py │ │ │ │ ├── gpqa_0shot_nocot_genericllmeval_gen_772ea0.py │ │ │ │ ├── gpqa_0shot_nocot_genericllmeval_xml_gen_772ea0.py │ │ │ │ ├── gpqa_0shot_nocot_llmjudge_gen_772ea0.py │ │ │ │ ├── gpqa_cascade_eval_academic.py │ │ │ │ ├── gpqa_cascade_eval_gen_772ea0.py │ │ │ │ ├── gpqa_few_shot_ppl_4b5a83.py │ │ │ │ ├── gpqa_gen.py │ │ │ │ ├── gpqa_gen_015262.py │ │ │ │ ├── gpqa_gen_4baadb.py │ │ │ │ ├── gpqa_llm_judge_gen.py │ │ │ │ ├── gpqa_openai_simple_evals_gen_5aeece.py │ │ │ │ └── gpqa_ppl_6bf57a.py │ │ │ ├── gsm8k │ │ │ │ ├── README.md │ │ │ │ ├── deprecated_gsm8k_agent_gen_be1606.py │ │ │ │ ├── gsm8k_0shot_gen_a58960.py │ │ │ │ ├── gsm8k_0shot_nocot_gen_6cbf22.py │ │ │ │ ├── gsm8k_0shot_v2_gen_17d799.py │ │ │ │ ├── gsm8k_0shot_v2_gen_6e39a4.py │ │ │ │ ├── gsm8k_0shot_v2_gen_a58960.py │ │ │ │ ├── gsm8k_agent_gen_c3dff3.py │ │ │ │ ├── gsm8k_gen.py │ │ │ │ ├── gsm8k_gen_17d0dc.py │ │ │ │ ├── gsm8k_gen_1d7fe4.py │ │ │ │ ├── gsm8k_gen_1dce88.py │ │ │ │ ├── gsm8k_gen_3309bd.py │ │ │ │ ├── gsm8k_gen_57b0b1.py │ │ │ │ ├── gsm8k_gen_701491.py │ │ │ │ ├── gsm8k_gen_a3e34a.py │ │ │ │ ├── gsm8k_gen_d6de81.py │ │ │ │ ├── gsm8k_gen_e9e91e.py │ │ │ │ ├── gsm8k_gen_ee684f.py │ │ │ │ ├── gsm8k_model_postprocess_gen_a58960.py │ │ │ │ └── gsm8k_xfinder_gen_a58960.py │ │ │ ├── gsm8k_contamination │ │ │ │ └── gsm8k_contamination_ppl_ecdd22.py │ │ │ ├── gsm_hard │ │ │ │ ├── gsmhard_gen.py │ │ │ │ └── gsmhard_gen_8a1400.py │ │ │ ├── hellaswag │ │ │ │ ├── README.md │ │ │ │ ├── hellaswag_10shot_gen_e42710.py │ │ │ │ ├── hellaswag_10shot_ppl_59c85e.py │ │ │ │ ├── hellaswag_clean_ppl.py │ │ │ │ ├── hellaswag_gen.py │ │ │ │ ├── hellaswag_gen_6faab5.py │ │ │ │ ├── hellaswag_llm_judge_gen.py │ │ │ │ ├── hellaswag_llmjudge_gen_809ef1.py │ │ │ │ ├── hellaswag_ppl.py │ │ │ │ ├── hellaswag_ppl_47bff9.py │ │ │ │ ├── hellaswag_ppl_7d7f2d.py │ │ │ │ ├── hellaswag_ppl_9dbb12.py │ │ │ │ └── hellaswag_ppl_a6e128.py │ │ │ ├── humaneval │ │ │ │ ├── README.md │ │ │ │ ├── deprecated_humaneval_gen_4a6eef.py │ │ │ │ ├── deprecated_humaneval_gen_6d1cc2.py │ │ │ │ ├── deprecated_humaneval_gen_a82cae.py │ │ │ │ ├── deprecated_humaneval_gen_d2537e.py │ │ │ │ ├── deprecated_humaneval_gen_fd5822.py │ │ │ │ ├── deprecated_humaneval_gen_ff7054.py │ │ │ │ ├── humaneval_gen.py │ │ │ │ ├── humaneval_gen_66a7f4.py │ │ │ │ ├── humaneval_gen_8e312c.py │ │ │ │ ├── humaneval_openai_sample_evals_gen_159614.py │ │ │ │ ├── humaneval_openai_sample_evals_gen_dcae0e.py │ │ │ │ ├── humaneval_openai_sample_evals_o1_gen_5e7b00.py │ │ │ │ ├── humaneval_openai_sample_evals_repeat_gen_dcae0e.py │ │ │ │ ├── humaneval_passk_gen_8e312c.py │ │ │ │ ├── humaneval_repeat10_gen_8e312c.py │ │ │ │ ├── internal_humaneval_gen_ce6b06.py │ │ │ │ └── internal_humaneval_gen_d2537e.py │ │ │ ├── humaneval_cn │ │ │ │ ├── humaneval_cn_gen.py │ │ │ │ ├── humaneval_cn_gen_6313aa.py │ │ │ │ ├── humaneval_cn_passk_gen_6313aa.py │ │ │ │ └── humaneval_cn_repeat10_gen_6313aa.py │ │ │ ├── humaneval_multi │ │ │ │ ├── humaneval_multi_gen.py │ │ │ │ └── humaneval_multi_gen_82cf85.py │ │ │ ├── humaneval_plus │ │ │ │ ├── humaneval_plus_gen.py │ │ │ │ ├── humaneval_plus_gen_66a7f4.py │ │ │ │ ├── humaneval_plus_gen_8e312c.py │ │ │ │ ├── humaneval_plus_openai_simple_evals_gen_159614.py │ │ │ │ ├── humaneval_plus_passk_gen_8e312c.py │ │ │ │ ├── humaneval_plus_repeat10_gen_8e312c.py │ │ │ │ └── humaneval_plus_repeat_gen_41b01c.py │ │ │ ├── humaneval_pro │ │ │ │ ├── README.md │ │ │ │ ├── humaneval_pro_gen.py │ │ │ │ ├── humaneval_pro_gen_3dc067.py │ │ │ │ └── humaneval_pro_repeat_gen_3dc067.py │ │ │ ├── humanevalx │ │ │ │ ├── humanevalx_0shot_nocot_gen_3e4bbd.py │ │ │ │ ├── humanevalx_gen.py │ │ │ │ ├── humanevalx_gen_0af626.py │ │ │ │ ├── humanevalx_gen_3d84a3.py │ │ │ │ ├── humanevalx_gen_620cfa.py │ │ │ │ └── humanevalx_repeat_gen_3d84a3.py │ │ │ ├── hungarian_exam │ │ │ │ ├── hungarian_exam_gen.py │ │ │ │ └── hungarian_exam_gen_8a1435.py │ │ │ ├── inference_ppl │ │ │ │ ├── README.md │ │ │ │ └── inference_ppl.py │ │ │ ├── infinitebench │ │ │ │ ├── infinitebench.py │ │ │ │ ├── infinitebenchcodedebug │ │ │ │ │ ├── infinitebench_codedebug_gen.py │ │ │ │ │ └── infinitebench_codedebug_gen_276a42.py │ │ │ │ ├── infinitebenchcoderun │ │ │ │ │ ├── infinitebench_coderun_gen.py │ │ │ │ │ └── infinitebench_coderun_gen_1a76bd.py │ │ │ │ ├── infinitebenchendia │ │ │ │ │ ├── infinitebench_endia_gen.py │ │ │ │ │ └── infinitebench_endia_gen_c96eb5.py │ │ │ │ ├── infinitebenchenmc │ │ │ │ │ ├── infinitebench_enmc_gen.py │ │ │ │ │ └── infinitebench_enmc_gen_3a4102.py │ │ │ │ ├── infinitebenchenqa │ │ │ │ │ ├── infinitebench_enqa_gen.py │ │ │ │ │ └── infinitebench_enqa_gen_a1640c.py │ │ │ │ ├── infinitebenchensum │ │ │ │ │ ├── infinitebench_ensum_gen.py │ │ │ │ │ └── infinitebench_ensum_gen_cfbc08.py │ │ │ │ ├── infinitebenchmathcalc │ │ │ │ │ ├── infinitebench_mathcalc_gen.py │ │ │ │ │ └── infinitebench_mathcalc_gen_78d17e.py │ │ │ │ ├── infinitebenchmathfind │ │ │ │ │ ├── infinitebench_mathfind_gen.py │ │ │ │ │ └── infinitebench_mathfind_gen_6d799e.py │ │ │ │ ├── infinitebenchretrievekv │ │ │ │ │ ├── infinitebench_retrievekv_gen.py │ │ │ │ │ └── infinitebench_retrievekv_gen_06b3ac.py │ │ │ │ ├── infinitebenchretrievenumber │ │ │ │ │ ├── infinitebench_retrievenumber_gen.py │ │ │ │ │ └── infinitebench_retrievenumber_gen_047436.py │ │ │ │ ├── infinitebenchretrievepasskey │ │ │ │ │ ├── infinitebench_retrievepasskey_gen.py │ │ │ │ │ └── infinitebench_retrievepasskey_gen_62ff68.py │ │ │ │ └── infinitebenchzhqa │ │ │ │ │ ├── infinitebench_zhqa_gen.py │ │ │ │ │ └── infinitebench_zhqa_gen_1e5293.py │ │ │ ├── internsandbox │ │ │ │ ├── internsandbox_gen.py │ │ │ │ └── internsandbox_gen_44b982.py │ │ │ ├── iwslt2017 │ │ │ │ ├── iwslt2017_gen.py │ │ │ │ ├── iwslt2017_gen_69ce16.py │ │ │ │ ├── iwslt2017_gen_b4a814.py │ │ │ │ └── iwslt2017_gen_d0ebd1.py │ │ │ ├── jigsawmultilingual │ │ │ │ ├── jigsawmultilingual_clp.py │ │ │ │ ├── jigsawmultilingual_clp_1af0ae.py │ │ │ │ └── jigsawmultilingual_clp_fe50d8.py │ │ │ ├── judge │ │ │ │ ├── judgebench.py │ │ │ │ ├── judgerbenchv2.py │ │ │ │ ├── rewardbench.py │ │ │ │ └── rmb.py │ │ │ ├── kaoshi │ │ │ │ ├── kaoshi_gen.py │ │ │ │ └── kaoshi_gen_86aca2.py │ │ │ ├── kcle │ │ │ │ └── kcle_llm_judge_gen.py │ │ │ ├── korbench │ │ │ │ ├── korbench_gen.py │ │ │ │ ├── korbench_llm_judge_gen.py │ │ │ │ ├── korbench_llmjudge_gen_17854d.py │ │ │ │ ├── korbench_llmjudge_gen_56cf43.py │ │ │ │ ├── korbench_mixed_gen_d00bdd.py │ │ │ │ ├── korbench_single_0_shot_gen.py │ │ │ │ ├── korbench_single_0shot_cascade_eval_gen_56cf43.py │ │ │ │ ├── korbench_single_0shot_genericllmeval_gen_17854d.py │ │ │ │ ├── korbench_single_0shot_llmjudge_gen.py │ │ │ │ ├── korbench_single_3_shot_gen.py │ │ │ │ └── readme.md │ │ │ ├── lambada │ │ │ │ ├── lambada_gen.py │ │ │ │ ├── lambada_gen_217e11.py │ │ │ │ └── lambada_gen_8b48a5.py │ │ │ ├── lawbench │ │ │ │ ├── lawbench_one_shot_gen_002588.py │ │ │ │ └── lawbench_zero_shot_gen_002588.py │ │ │ ├── lcsts │ │ │ │ ├── lcsts_gen.py │ │ │ │ ├── lcsts_gen_8ee1fe.py │ │ │ │ └── lcsts_gen_9b0b89.py │ │ │ ├── leval │ │ │ │ ├── leval.py │ │ │ │ ├── levalcoursera │ │ │ │ │ ├── leval_coursera_gen.py │ │ │ │ │ └── leval_coursera_gen_36a006.py │ │ │ │ ├── levalfinancialqa │ │ │ │ │ ├── leval_financialqa_gen.py │ │ │ │ │ └── leval_financialqa_gen_b03798.py │ │ │ │ ├── levalgovreportsumm │ │ │ │ │ ├── leval_gov_report_summ_gen.py │ │ │ │ │ └── leval_gov_report_summ_gen_b03798.py │ │ │ │ ├── levalgsm100 │ │ │ │ │ ├── leval_gsm100_gen.py │ │ │ │ │ └── leval_gsm100_gen_77dd94.py │ │ │ │ ├── levallegalcontractqa │ │ │ │ │ ├── leval_legalcontractqa_gen.py │ │ │ │ │ └── leval_legalcontractqa_gen_68a2ac.py │ │ │ │ ├── levalmeetingsumm │ │ │ │ │ ├── leval_meetingsumm_gen.py │ │ │ │ │ └── leval_meetingsumm_gen_b03798.py │ │ │ │ ├── levalmultidocqa │ │ │ │ │ ├── leval_multidocqa_gen.py │ │ │ │ │ └── leval_multidocqa_gen_96bf3f.py │ │ │ │ ├── levalnarrativeqa │ │ │ │ │ ├── leval_narrativeqa_gen.py │ │ │ │ │ └── leval_narrativeqa_gen_766dd0.py │ │ │ │ ├── levalnaturalquestion │ │ │ │ │ ├── leval_naturalquestion_gen.py │ │ │ │ │ └── leval_naturalquestion_gen_52c33f.py │ │ │ │ ├── levalnewssumm │ │ │ │ │ ├── leval_newssumm_gen.py │ │ │ │ │ └── leval_newssumm_gen_b03798.py │ │ │ │ ├── levalpaperassistant │ │ │ │ │ ├── leval_paper_assistant_gen.py │ │ │ │ │ └── leval_paper_assistant_gen_b03798.py │ │ │ │ ├── levalpatentsumm │ │ │ │ │ ├── leval_patent_summ_gen.py │ │ │ │ │ └── leval_patent_summ_gen_b03798.py │ │ │ │ ├── levalquality │ │ │ │ │ ├── leval_quality_gen.py │ │ │ │ │ └── leval_quality_gen_36a006.py │ │ │ │ ├── levalreviewsumm │ │ │ │ │ ├── leval_review_summ_gen.py │ │ │ │ │ └── leval_review_summ_gen_b03798.py │ │ │ │ ├── levalscientificqa │ │ │ │ │ ├── leval_scientificqa_gen.py │ │ │ │ │ └── leval_scientificqa_gen_96bf3f.py │ │ │ │ ├── levaltopicretrieval │ │ │ │ │ ├── leval_topic_retrieval_gen.py │ │ │ │ │ └── leval_topic_retrieval_gen_bf433f.py │ │ │ │ ├── levaltpo │ │ │ │ │ ├── leval_tpo_gen.py │ │ │ │ │ └── leval_tpo_gen_36a006.py │ │ │ │ └── levaltvshowsumm │ │ │ │ │ ├── leval_tvshow_summ_gen.py │ │ │ │ │ └── leval_tvshow_summ_gen_b03798.py │ │ │ ├── livecodebench │ │ │ │ ├── README.md │ │ │ │ ├── livecodebench_code_generation_repeat_gen_b5b6c5.py │ │ │ │ ├── livecodebench_gen.py │ │ │ │ ├── livecodebench_gen_6966bc.py │ │ │ │ ├── livecodebench_gen_a4f90b.py │ │ │ │ ├── livecodebench_gen_b2b0fd.py │ │ │ │ ├── livecodebench_o1_gen_f0ed6c.py │ │ │ │ ├── livecodebench_split_v4_o1_gen_f0ed6c.py │ │ │ │ ├── livecodebench_split_v4_o1_postprocess_gen_f0ed6c.py │ │ │ │ ├── livecodebench_time_split_gen_a4f90b.py │ │ │ │ ├── livecodebench_v1_o1_gen_f0ed6c.py │ │ │ │ └── livecodebench_v6_academic.py │ │ │ ├── livemathbench │ │ │ │ ├── README.md │ │ │ │ ├── livemathbench_gen.py │ │ │ │ ├── livemathbench_gen_6eb711.py │ │ │ │ ├── livemathbench_gen_9befbf.py │ │ │ │ ├── livemathbench_gen_caed8f.py │ │ │ │ ├── livemathbench_greedy_gen.py │ │ │ │ ├── livemathbench_greedy_gen_9befbf.py │ │ │ │ ├── livemathbench_hard_custom_cascade_eval_gen_4bce59.py │ │ │ │ ├── livemathbench_hard_custom_llmverify_gen_85d0ef.py │ │ │ │ ├── livemathbench_hard_gen_353ae7.py │ │ │ │ ├── livemathbench_hard_greedy_gen_353ae7.py │ │ │ │ ├── livemathbench_hard_llmjudge_gen_71eaf5.py │ │ │ │ ├── livemathbench_v202505_gen_9befbf.py │ │ │ │ ├── livemathbench_v202505_greedy_gen_9befbf.py │ │ │ │ ├── livemathbench_v202505_hard_gen_353ae7.py │ │ │ │ └── livemathbench_v202505_hard_greedy_gen_353ae7.py │ │ │ ├── livereasonbench │ │ │ │ ├── livereasonbench_gen.py │ │ │ │ ├── livereasonbench_gen_f990de.py │ │ │ │ ├── livereasonbench_genericllmeval_gen_f990de.py │ │ │ │ └── livereasonbench_llmverify_20250428_gen_0484cb.py │ │ │ ├── livestembench │ │ │ │ ├── livestembench_0shot_noncot_gen_2e6d10.py │ │ │ │ ├── livestembench_0shot_noncot_xml_gen_2e6d10.py │ │ │ │ ├── livestembench_gen.py │ │ │ │ └── livestembench_gen_3e3c50.py │ │ │ ├── llm_compression │ │ │ │ ├── README.md │ │ │ │ └── llm_compression.py │ │ │ ├── longbench │ │ │ │ ├── longbench.py │ │ │ │ ├── longbench2wikimqa │ │ │ │ │ ├── longbench_2wikimqa_gen.py │ │ │ │ │ └── longbench_2wikimqa_gen_6b3efc.py │ │ │ │ ├── longbenchdureader │ │ │ │ │ ├── longbench_dureader_gen.py │ │ │ │ │ └── longbench_dureader_gen_c6c7e4.py │ │ │ │ ├── longbenchgov_report │ │ │ │ │ ├── longbench_gov_report_gen.py │ │ │ │ │ └── longbench_gov_report_gen_54c5b0.py │ │ │ │ ├── longbenchhotpotqa │ │ │ │ │ ├── longbench_hotpotqa_gen.py │ │ │ │ │ └── longbench_hotpotqa_gen_6b3efc.py │ │ │ │ ├── longbenchlcc │ │ │ │ │ ├── longbench_lcc_gen.py │ │ │ │ │ └── longbench_lcc_gen_6ba507.py │ │ │ │ ├── longbenchlsht │ │ │ │ │ ├── longbench_lsht_gen.py │ │ │ │ │ └── longbench_lsht_gen_e8a339.py │ │ │ │ ├── longbenchmulti_news │ │ │ │ │ ├── longbench_multi_news_gen.py │ │ │ │ │ └── longbench_multi_news_gen_6f9da9.py │ │ │ │ ├── longbenchmultifieldqa_en │ │ │ │ │ ├── longbench_multifieldqa_en_gen.py │ │ │ │ │ └── longbench_multifieldqa_en_gen_d3838e.py │ │ │ │ ├── longbenchmultifieldqa_zh │ │ │ │ │ ├── longbench_multifieldqa_zh_gen.py │ │ │ │ │ └── longbench_multifieldqa_zh_gen_e9a7ef.py │ │ │ │ ├── longbenchmusique │ │ │ │ │ ├── longbench_musique_gen.py │ │ │ │ │ └── longbench_musique_gen_6b3efc.py │ │ │ │ ├── longbenchnarrativeqa │ │ │ │ │ ├── longbench_narrativeqa_gen.py │ │ │ │ │ └── longbench_narrativeqa_gen_a68305.py │ │ │ │ ├── longbenchpassage_count │ │ │ │ │ ├── longbench_passage_count_gen.py │ │ │ │ │ └── longbench_passage_count_gen_dcdaab.py │ │ │ │ ├── longbenchpassage_retrieval_en │ │ │ │ │ ├── longbench_passage_retrieval_en_gen.py │ │ │ │ │ └── longbench_passage_retrieval_en_gen_734db5.py │ │ │ │ ├── longbenchpassage_retrieval_zh │ │ │ │ │ ├── longbench_passage_retrieval_zh_gen.py │ │ │ │ │ └── longbench_passage_retrieval_zh_gen_01cca2.py │ │ │ │ ├── longbenchqasper │ │ │ │ │ ├── longbench_qasper_gen.py │ │ │ │ │ └── longbench_qasper_gen_6b3efc.py │ │ │ │ ├── longbenchqmsum │ │ │ │ │ ├── longbench_qmsum_gen.py │ │ │ │ │ └── longbench_qmsum_gen_d33331.py │ │ │ │ ├── longbenchrepobench │ │ │ │ │ ├── longbench_repobench_gen.py │ │ │ │ │ └── longbench_repobench_gen_6df953.py │ │ │ │ ├── longbenchsamsum │ │ │ │ │ ├── longbench_samsum_gen.py │ │ │ │ │ └── longbench_samsum_gen_f4416d.py │ │ │ │ ├── longbenchtrec │ │ │ │ │ ├── longbench_trec_gen.py │ │ │ │ │ └── longbench_trec_gen_824187.py │ │ │ │ ├── longbenchtriviaqa │ │ │ │ │ ├── longbench_triviaqa_gen.py │ │ │ │ │ └── longbench_triviaqa_gen_d30cb9.py │ │ │ │ └── longbenchvcsum │ │ │ │ │ ├── longbench_vcsum_gen.py │ │ │ │ │ └── longbench_vcsum_gen_f7a8ac.py │ │ │ ├── longbenchv2 │ │ │ │ ├── longbenchv2_gen.py │ │ │ │ └── longbenchv2_gen_75fbba.py │ │ │ ├── lveval │ │ │ │ ├── lveval.md │ │ │ │ ├── lveval.py │ │ │ │ ├── lvevalcmrc_mixup │ │ │ │ │ ├── lveval_cmrc_mixup_gen.py │ │ │ │ │ └── lveval_cmrc_mixup_gen_465823.py │ │ │ │ ├── lvevaldureader_mixup │ │ │ │ │ ├── lveval_dureader_mixup_gen.py │ │ │ │ │ └── lveval_dureader_mixup_gen_465823.py │ │ │ │ ├── lvevalfactrecall_en │ │ │ │ │ ├── lveval_factrecall_en_gen.py │ │ │ │ │ └── lveval_factrecall_en_gen_9a836f.py │ │ │ │ ├── lvevalfactrecall_zh │ │ │ │ │ ├── lveval_factrecall_zh_gen.py │ │ │ │ │ └── lveval_factrecall_zh_gen_dbee70.py │ │ │ │ ├── lvevalhotpotwikiqa_mixup │ │ │ │ │ ├── lveval_hotpotwikiqa_mixup_gen.py │ │ │ │ │ └── lveval_hotpotwikiqa_mixup_gen_77ce82.py │ │ │ │ ├── lvevallic_mixup │ │ │ │ │ ├── lveval_lic_mixup_gen.py │ │ │ │ │ └── lveval_lic_mixup_gen_01eb0c.py │ │ │ │ ├── lvevalloogle_CR_mixup │ │ │ │ │ ├── lveval_loogle_CR_mixup_gen.py │ │ │ │ │ └── lveval_loogle_CR_mixup_gen_d7ea36.py │ │ │ │ ├── lvevalloogle_MIR_mixup │ │ │ │ │ ├── lveval_loogle_MIR_mixup_gen.py │ │ │ │ │ └── lveval_loogle_MIR_mixup_gen_d7ea36.py │ │ │ │ ├── lvevalloogle_SD_mixup │ │ │ │ │ ├── lveval_loogle_SD_mixup_gen.py │ │ │ │ │ └── lveval_loogle_SD_mixup_gen_d7ea36.py │ │ │ │ ├── lvevalmultifieldqa_en_mixup │ │ │ │ │ ├── lveval_multifieldqa_en_mixup_gen.py │ │ │ │ │ └── lveval_multifieldqa_en_mixup_gen_d7ea36.py │ │ │ │ └── lvevalmultifieldqa_zh_mixup │ │ │ │ │ ├── lveval_multifieldqa_zh_mixup_gen.py │ │ │ │ │ └── lveval_multifieldqa_zh_mixup_gen_0fbdad.py │ │ │ ├── mastermath2024v1 │ │ │ │ ├── mastermath2024v1_gen.py │ │ │ │ └── mastermath2024v1_gen_be6318.py │ │ │ ├── matbench │ │ │ │ ├── matbench_gen.py │ │ │ │ ├── matbench_gen_f71840.py │ │ │ │ ├── matbench_llm_judge_gen_0e9276.py │ │ │ │ └── matbench_regex_judge_gen_0e9276.py │ │ │ ├── math │ │ │ │ ├── README.md │ │ │ │ ├── deprecated_math_agent_evaluatorv2_gen_861b4f.py │ │ │ │ ├── deprecated_math_evaluatorv2_gen_265cce.py │ │ │ │ ├── math_0shot_gen_11c4b5.py │ │ │ │ ├── math_0shot_gen_393424.py │ │ │ │ ├── math_0shot_llm_judge_gen_393424.py │ │ │ │ ├── math_0shot_llm_judge_v2_gen_31d777.py │ │ │ │ ├── math_4shot_base_gen_43d5b6.py │ │ │ │ ├── math_4shot_base_gen_db136b.py │ │ │ │ ├── math_4shot_example_from_google_research.py │ │ │ │ ├── math_500_cascade_eval_gen_6ff468.py │ │ │ │ ├── math_500_gen.py │ │ │ │ ├── math_500_llmjudge_gen_6ff468.py │ │ │ │ ├── math_agent_evaluatorv2_gen_0c1b4e.py │ │ │ │ ├── math_agent_gen_0c1b4e.py │ │ │ │ ├── math_agent_gen_861b4f.py │ │ │ │ ├── math_agent_gen_af2293.py │ │ │ │ ├── math_evaluatorv2_gen_2f4a71.py │ │ │ │ ├── math_evaluatorv2_gen_cecb31.py │ │ │ │ ├── math_gen.py │ │ │ │ ├── math_gen_0957ff.py │ │ │ │ ├── math_gen_1ed9c2.py │ │ │ │ ├── math_gen_265cce.py │ │ │ │ ├── math_gen_559593.py │ │ │ │ ├── math_gen_5e8458.py │ │ │ │ ├── math_gen_736506.py │ │ │ │ ├── math_gen_78ced2.py │ │ │ │ ├── math_gen_943d32.py │ │ │ │ ├── math_gen_a58d9d.py │ │ │ │ ├── math_intern_evaluator_gen_265cce.py │ │ │ │ ├── math_llm_judge_gen.py │ │ │ │ ├── math_llm_judge_gen_56606f.py │ │ │ │ ├── math_prm800k_500_0shot_cot_academic_gen.py │ │ │ │ ├── math_prm800k_500_0shot_cot_gen.py │ │ │ │ ├── math_prm800k_500_0shot_cot_gen_11c4b5.py │ │ │ │ ├── math_prm800k_500_0shot_nocot_gen_b27274.py │ │ │ │ ├── math_prm800k_500_0shot_nocot_genericllmeval_gen_63a000.py │ │ │ │ ├── math_prm800k_500_0shot_nocot_genericllmeval_gen_6ff468.py │ │ │ │ ├── math_prm800k_500_0shot_nocot_genericllmeval_xml_gen_63a000.py │ │ │ │ ├── math_prm800k_500_0shot_nocot_llmjudge_gen_63a000.py │ │ │ │ ├── math_prm800k_500_gen.py │ │ │ │ ├── math_prm800k_500_gen_393424.py │ │ │ │ ├── math_prm800k_500_llm_judge_gen.py │ │ │ │ ├── math_prm800k_500_llmverify_gen_6ff468.py │ │ │ │ └── math_prm800k_500_llmverify_repeat4_gen_97b203.py │ │ │ ├── math401 │ │ │ │ ├── math401_gen.py │ │ │ │ └── math401_gen_ab5f39.py │ │ │ ├── mbpp │ │ │ │ ├── README.md │ │ │ │ ├── deprecated_mbpp_gen_1e1056.py │ │ │ │ ├── deprecated_mbpp_gen_6590b0.py │ │ │ │ ├── deprecated_mbpp_gen_caa7ab.py │ │ │ │ ├── deprecated_mbpp_passk_gen_1e1056.py │ │ │ │ ├── deprecated_mbpp_repeat10_gen_1e1056.py │ │ │ │ ├── deprecated_sanitized_mbpp_gen_1e1056.py │ │ │ │ ├── deprecated_sanitized_mbpp_gen_cb43ef.py │ │ │ │ ├── deprecated_sanitized_mbpp_passk_gen_1e1056.py │ │ │ │ ├── deprecated_sanitized_mbpp_repeat10_gen_1e1056.py │ │ │ │ ├── mbpp_gen.py │ │ │ │ ├── mbpp_gen_830460.py │ │ │ │ ├── mbpp_passk_gen_830460.py │ │ │ │ ├── mbpp_repeat10_gen_830460.py │ │ │ │ ├── mbpp_repeat_gen_18dd1b.py │ │ │ │ ├── sanitized_mbpp_gen_742f0c.py │ │ │ │ ├── sanitized_mbpp_gen_830460.py │ │ │ │ ├── sanitized_mbpp_gen_a0fc46.py │ │ │ │ ├── sanitized_mbpp_mdblock_0shot_nocot_gen_a2e416.py │ │ │ │ ├── sanitized_mbpp_mdblock_gen_a447ff.py │ │ │ │ ├── sanitized_mbpp_passk_gen_830460.py │ │ │ │ └── sanitized_mbpp_repeat10_gen_830460.py │ │ │ ├── mbpp_cn │ │ │ │ ├── deprecated_mbpp_cn_gen_1d1481.py │ │ │ │ ├── deprecated_mbpp_cn_passk_gen_1d1481.py │ │ │ │ ├── deprecated_mbpp_cn_repeat10_gen_1d1481.py │ │ │ │ ├── mbpp_cn_gen.py │ │ │ │ └── mbpp_cn_gen_9114d5.py │ │ │ ├── mbpp_plus │ │ │ │ ├── deprecated_mbpp_plus_gen_94815c.py │ │ │ │ ├── mbpp_plus_gen.py │ │ │ │ └── mbpp_plus_gen_0b836a.py │ │ │ ├── mbpp_pro │ │ │ │ ├── README.md │ │ │ │ ├── mbpp_pro_gen.py │ │ │ │ ├── mbpp_pro_gen_3dc067.py │ │ │ │ └── mbpp_pro_repeat_gen_3dc067.py │ │ │ ├── medmcqa │ │ │ │ ├── medmcqa_gen.py │ │ │ │ ├── medmcqa_gen_60c8f5.py │ │ │ │ ├── medmcqa_llmjudge_gen.py │ │ │ │ └── medmcqa_llmjudge_gen_60c8f5.py │ │ │ ├── mgsm │ │ │ │ ├── README.md │ │ │ │ ├── mgsm_gen.py │ │ │ │ └── mgsm_gen_d967bc.py │ │ │ ├── mmlu │ │ │ │ ├── README.md │ │ │ │ ├── mmlu_all_sets.py │ │ │ │ ├── mmlu_clean_ppl.py │ │ │ │ ├── mmlu_gen.py │ │ │ │ ├── mmlu_gen_23a9a9.py │ │ │ │ ├── mmlu_gen_4d595a.py │ │ │ │ ├── mmlu_gen_5d1409.py │ │ │ │ ├── mmlu_gen_79e572.py │ │ │ │ ├── mmlu_gen_a484b3.py │ │ │ │ ├── mmlu_llm_judge_gen.py │ │ │ │ ├── mmlu_llmjudge_gen_f4336b.py │ │ │ │ ├── mmlu_model_postprocess_gen_4d595a.py │ │ │ │ ├── mmlu_openai_0shot_nocot_llmjudge_gen_216503.py │ │ │ │ ├── mmlu_openai_simple_evals_gen_b618ea.py │ │ │ │ ├── mmlu_ppl.py │ │ │ │ ├── mmlu_ppl_ac766d.py │ │ │ │ ├── mmlu_stem_0shot_cascade_eval_gen_216503.py │ │ │ │ ├── mmlu_stem_0shot_gen_216503.py │ │ │ │ ├── mmlu_stem_0shot_xml_gen_216503.py │ │ │ │ ├── mmlu_stem_sets.py │ │ │ │ ├── mmlu_xfinder_gen_4d595a.py │ │ │ │ └── mmlu_zero_shot_gen_47e2c0.py │ │ │ ├── mmlu_cf │ │ │ │ ├── mmlu_cf_categories.py │ │ │ │ ├── mmlu_cf_few_shot.py │ │ │ │ ├── mmlu_cf_gen.py │ │ │ │ ├── mmlu_cf_gen_040615.py │ │ │ │ └── mmlu_cf_zero_shot.py │ │ │ ├── mmlu_pro │ │ │ │ ├── mmlu_pro_0shot_cot_gen_08c1de.py │ │ │ │ ├── mmlu_pro_0shot_nocot_genericllmeval_gen_08c1de.py │ │ │ │ ├── mmlu_pro_biomed_0shot_cot_gen_057927.py │ │ │ │ ├── mmlu_pro_biomed_0shot_nocot_genericllmeval_gen_057927.py │ │ │ │ ├── mmlu_pro_categories.py │ │ │ │ ├── mmlu_pro_few_shot_gen_bfaf90.py │ │ │ │ ├── mmlu_pro_gen.py │ │ │ │ ├── mmlu_pro_gen_cdbebf.py │ │ │ │ └── mmlu_pro_llm_judge_gen.py │ │ │ ├── mmmlu │ │ │ │ ├── README.md │ │ │ │ ├── mmmlu_5_shot_gen_bcbeb3.py │ │ │ │ ├── mmmlu_gen.py │ │ │ │ ├── mmmlu_gen_c51a84.py │ │ │ │ └── mmmlu_prompt.py │ │ │ ├── mmmlu_lite │ │ │ │ ├── README.md │ │ │ │ ├── mmmlu_lite_gen.py │ │ │ │ └── mmmlu_lite_gen_c51a84.py │ │ │ ├── multipl_e │ │ │ │ ├── multiple_gen.py │ │ │ │ ├── multiple_top_ten_gen_f44aaf.py │ │ │ │ └── multiple_top_ten_repeat_gen_0cd6ce.py │ │ │ ├── musr │ │ │ │ ├── README.md │ │ │ │ ├── musr_gen.py │ │ │ │ ├── musr_gen_3622bb.py │ │ │ │ ├── musr_gen_3c6e15.py │ │ │ │ ├── musr_gen_b47fd3.py │ │ │ │ ├── musr_llm_judge_gen.py │ │ │ │ └── musr_llmjudge_gen_b47fd3.py │ │ │ ├── narrativeqa │ │ │ │ ├── narrativeqa_gen.py │ │ │ │ ├── narrativeqa_gen_a2d88a.py │ │ │ │ └── narrativeqa_gen_db6413.py │ │ │ ├── needlebench │ │ │ │ ├── atc │ │ │ │ │ ├── atc.py │ │ │ │ │ ├── atc_choice.py │ │ │ │ │ ├── atc_choice_20.py │ │ │ │ │ ├── atc_choice_50.py │ │ │ │ │ ├── atc_choice_50_en_reasoning.py │ │ │ │ │ ├── atc_choice_80.py │ │ │ │ │ └── atc_choice_80_en_reasoning.py │ │ │ │ ├── needlebench_1000k │ │ │ │ │ ├── needlebench_1000k.py │ │ │ │ │ ├── needlebench_multi_reasoning_1000k.py │ │ │ │ │ ├── needlebench_multi_retrieval_1000k.py │ │ │ │ │ └── needlebench_single_1000k.py │ │ │ │ ├── needlebench_128k │ │ │ │ │ ├── needlebench_128k.py │ │ │ │ │ ├── needlebench_multi_reasoning_128k.py │ │ │ │ │ ├── needlebench_multi_retrieval_128k.py │ │ │ │ │ └── needlebench_single_128k.py │ │ │ │ ├── needlebench_200k │ │ │ │ │ ├── needlebench_200k.py │ │ │ │ │ ├── needlebench_multi_reasoning_200k.py │ │ │ │ │ ├── needlebench_multi_retrieval_200k.py │ │ │ │ │ └── needlebench_single_200k.py │ │ │ │ ├── needlebench_256k │ │ │ │ │ ├── needlebench_256k.py │ │ │ │ │ ├── needlebench_multi_reasoning_256k.py │ │ │ │ │ ├── needlebench_multi_retrieval_256k.py │ │ │ │ │ └── needlebench_single_256k.py │ │ │ │ ├── needlebench_32k │ │ │ │ │ ├── needlebench_32k.py │ │ │ │ │ ├── needlebench_multi_reasoning_32k.py │ │ │ │ │ ├── needlebench_multi_retrieval_32k.py │ │ │ │ │ └── needlebench_single_32k.py │ │ │ │ ├── needlebench_4k │ │ │ │ │ ├── needlebench_4k.py │ │ │ │ │ ├── needlebench_multi_reasoning_4k.py │ │ │ │ │ ├── needlebench_multi_retrieval_4k.py │ │ │ │ │ └── needlebench_single_4k.py │ │ │ │ ├── needlebench_8k │ │ │ │ │ ├── needlebench_8k.py │ │ │ │ │ ├── needlebench_multi_reasoning_8k.py │ │ │ │ │ ├── needlebench_multi_retrieval_8k.py │ │ │ │ │ ├── needlebench_multi_retrieval_compare_batch_8k.py │ │ │ │ │ └── needlebench_single_8k.py │ │ │ │ ├── needlebench_base │ │ │ │ │ ├── needlebench_base_gen.py │ │ │ │ │ └── needlebench_single.py │ │ │ │ ├── readme.md │ │ │ │ └── readme_zh-CN.md │ │ │ ├── needlebench_v2 │ │ │ │ ├── atc │ │ │ │ │ └── atc_0shot_nocot_2_power_en.py │ │ │ │ ├── needlebench_v2_1000k │ │ │ │ │ ├── needlebench_v2_1000k.py │ │ │ │ │ ├── needlebench_v2_multi_reasoning_1000k.py │ │ │ │ │ ├── needlebench_v2_multi_retrieval_1000k.py │ │ │ │ │ └── needlebench_v2_single_1000k.py │ │ │ │ ├── needlebench_v2_128k │ │ │ │ │ ├── needlebench_v2_128k.py │ │ │ │ │ ├── needlebench_v2_multi_reasoning_128k.py │ │ │ │ │ ├── needlebench_v2_multi_retrieval_128k.py │ │ │ │ │ └── needlebench_v2_single_128k.py │ │ │ │ ├── needlebench_v2_200k │ │ │ │ │ ├── needlebench_v2_200k.py │ │ │ │ │ ├── needlebench_v2_multi_reasoning_200k.py │ │ │ │ │ ├── needlebench_v2_multi_retrieval_200k.py │ │ │ │ │ └── needlebench_v2_single_200k.py │ │ │ │ ├── needlebench_v2_256k │ │ │ │ │ ├── needlebench_v2_256k.py │ │ │ │ │ ├── needlebench_v2_multi_reasoning_256k.py │ │ │ │ │ ├── needlebench_v2_multi_retrieval_256k.py │ │ │ │ │ └── needlebench_v2_single_256k.py │ │ │ │ ├── needlebench_v2_32k │ │ │ │ │ ├── needlebench_v2_32k.py │ │ │ │ │ ├── needlebench_v2_multi_reasoning_32k.py │ │ │ │ │ ├── needlebench_v2_multi_retrieval_32k.py │ │ │ │ │ └── needlebench_v2_single_32k.py │ │ │ │ ├── needlebench_v2_4k │ │ │ │ │ ├── needlebench_v2_4k.py │ │ │ │ │ ├── needlebench_v2_multi_reasoning_4k.py │ │ │ │ │ ├── needlebench_v2_multi_retrieval_4k.py │ │ │ │ │ └── needlebench_v2_single_4k.py │ │ │ │ ├── needlebench_v2_8k │ │ │ │ │ ├── needlebench_v2_8k.py │ │ │ │ │ ├── needlebench_v2_multi_reasoning_8k.py │ │ │ │ │ ├── needlebench_v2_multi_retrieval_8k.py │ │ │ │ │ ├── needlebench_v2_multi_retrieval_compare_batch_8k.py │ │ │ │ │ └── needlebench_v2_single_8k.py │ │ │ │ ├── readme.md │ │ │ │ └── readme_zh-CN.md │ │ │ ├── nejm_ai_benchmark │ │ │ │ ├── nejmaibench_gen.py │ │ │ │ ├── nejmaibench_gen_60c8f5.py │ │ │ │ ├── nejmaibench_llmjudge_gen.py │ │ │ │ └── nejmaibench_llmjudge_gen_60c8f5.py │ │ │ ├── nq │ │ │ │ ├── README.md │ │ │ │ ├── nq_gen.py │ │ │ │ ├── nq_gen_0356ec.py │ │ │ │ ├── nq_gen_2463e2.py │ │ │ │ ├── nq_gen_3dcea1.py │ │ │ │ ├── nq_gen_68c1c6.py │ │ │ │ ├── nq_gen_c788f6.py │ │ │ │ ├── nq_open_1shot_gen_01cf41.py │ │ │ │ ├── nq_open_1shot_gen_20a989.py │ │ │ │ ├── nq_open_1shot_gen_2e45e5.py │ │ │ │ ├── nq_open_gen_e93f8a.py │ │ │ │ └── nq_xfinder_gen_3dcea1.py │ │ │ ├── nq_cn │ │ │ │ ├── nqcn_gen.py │ │ │ │ └── nqcn_gen_141737.py │ │ │ ├── obqa │ │ │ │ ├── obqa_gen.py │ │ │ │ ├── obqa_gen_9069e4.py │ │ │ │ ├── obqa_ppl.py │ │ │ │ ├── obqa_ppl_1defe8.py │ │ │ │ ├── obqa_ppl_6aac9e.py │ │ │ │ └── obqa_ppl_c7c154.py │ │ │ ├── omni_math │ │ │ │ ├── README.md │ │ │ │ ├── omni_math_cascade_eval_gen_ccf9c0.py │ │ │ │ ├── omni_math_gen.py │ │ │ │ ├── omni_math_gen_18cc08.py │ │ │ │ └── omni_math_llmverify_gen_ccf9c0.py │ │ │ ├── piqa │ │ │ │ ├── piqa_gen.py │ │ │ │ ├── piqa_gen_1194eb.py │ │ │ │ ├── piqa_ppl.py │ │ │ │ ├── piqa_ppl_0cfff2.py │ │ │ │ ├── piqa_ppl_1cf9f0.py │ │ │ │ └── piqa_ppl_3431ea.py │ │ │ ├── promptbench │ │ │ │ ├── promptbench_iwslt2017_gen_cbb8c8.py │ │ │ │ ├── promptbench_math_gen_abf776.py │ │ │ │ ├── promptbench_squad20_gen_b15d1c.py │ │ │ │ └── promptbench_wnli_gen_50662f.py │ │ │ ├── py150 │ │ │ │ ├── py150_gen.py │ │ │ │ └── py150_gen_38b13d.py │ │ │ ├── qabench │ │ │ │ ├── qabench_gen.py │ │ │ │ └── qabench_gen_353ae7.py │ │ │ ├── qasper │ │ │ │ ├── qasper_gen.py │ │ │ │ ├── qasper_gen_a2d88a.py │ │ │ │ └── qasper_gen_db6413.py │ │ │ ├── qaspercut │ │ │ │ ├── qaspercut_gen.py │ │ │ │ ├── qaspercut_gen_a2d88a.py │ │ │ │ └── qaspercut_gen_db6413.py │ │ │ ├── race │ │ │ │ ├── README.md │ │ │ │ ├── race_cot_gen_d95929.py │ │ │ │ ├── race_few_shot_gen_a498ed.py │ │ │ │ ├── race_few_shot_ppl.py │ │ │ │ ├── race_gen.py │ │ │ │ ├── race_gen_69ee4f.py │ │ │ │ ├── race_gen_9302a5.py │ │ │ │ ├── race_ppl.py │ │ │ │ ├── race_ppl_5831a0.py │ │ │ │ ├── race_ppl_a138cd.py │ │ │ │ └── race_ppl_abed12.py │ │ │ ├── realtoxicprompts │ │ │ │ ├── realtoxicprompts_gen.py │ │ │ │ ├── realtoxicprompts_gen_7605e4.py │ │ │ │ └── realtoxicprompts_gen_ac723c.py │ │ │ ├── rolebench │ │ │ │ ├── instruction_generalization_eng.py │ │ │ │ ├── instruction_generalization_zh.py │ │ │ │ └── role_generalization_eng.py │ │ │ ├── ruler │ │ │ │ ├── README.md │ │ │ │ ├── ruler_128k_gen.py │ │ │ │ ├── ruler_16k_gen.py │ │ │ │ ├── ruler_1m_gen.py │ │ │ │ ├── ruler_256k_gen.py │ │ │ │ ├── ruler_32k_gen.py │ │ │ │ ├── ruler_4k_gen.py │ │ │ │ ├── ruler_512k_gen.py │ │ │ │ ├── ruler_64k_gen.py │ │ │ │ ├── ruler_8k_gen.py │ │ │ │ ├── ruler_combined_gen.py │ │ │ │ ├── ruler_cwe_gen.py │ │ │ │ ├── ruler_fwe_gen.py │ │ │ │ ├── ruler_niah_gen.py │ │ │ │ ├── ruler_qa_gen.py │ │ │ │ └── ruler_vt_gen.py │ │ │ ├── s3eval │ │ │ │ ├── s3eval.md │ │ │ │ ├── s3eval_gen.py │ │ │ │ └── s3eval_gen_b8ac80.py │ │ │ ├── safety │ │ │ │ ├── safety_gen.py │ │ │ │ └── safety_gen_7ce197.py │ │ │ ├── sage │ │ │ │ ├── README.md │ │ │ │ ├── sage_gen.py │ │ │ │ └── sage_val_gen_906a48.py │ │ │ ├── scibench │ │ │ │ ├── scibench_gen.py │ │ │ │ └── scibench_gen_2b21f3.py │ │ │ ├── scicode │ │ │ │ ├── README.md │ │ │ │ ├── scicode_gen.py │ │ │ │ ├── scicode_gen_085b98.py │ │ │ │ ├── scicode_gen_62c139.py │ │ │ │ └── scicode_wbg_gen_085b98.py │ │ │ ├── siqa │ │ │ │ ├── siqa_gen.py │ │ │ │ ├── siqa_gen_18632c.py │ │ │ │ ├── siqa_gen_e78df3.py │ │ │ │ ├── siqa_ppl.py │ │ │ │ ├── siqa_ppl_42bc6e.py │ │ │ │ ├── siqa_ppl_7845b0.py │ │ │ │ ├── siqa_ppl_ced5f6.py │ │ │ │ └── siqa_ppl_e8d8c5.py │ │ │ ├── squad20 │ │ │ │ ├── squad20_gen.py │ │ │ │ └── squad20_gen_1710bc.py │ │ │ ├── srbench │ │ │ │ └── srbench_gen.py │ │ │ ├── storycloze │ │ │ │ ├── storycloze_gen.py │ │ │ │ ├── storycloze_gen_7f656a.py │ │ │ │ ├── storycloze_ppl.py │ │ │ │ ├── storycloze_ppl_496661.py │ │ │ │ └── storycloze_ppl_afd16f.py │ │ │ ├── strategyqa │ │ │ │ ├── strategyqa_gen.py │ │ │ │ ├── strategyqa_gen_1180a7.py │ │ │ │ └── strategyqa_gen_934441.py │ │ │ ├── subjective │ │ │ │ ├── alignbench │ │ │ │ │ ├── alignbench_judgeby_critiquellm.py │ │ │ │ │ ├── alignbench_judgeby_critiquellm_new.py │ │ │ │ │ ├── alignbench_v1_1_judgeby_critiquellm.py │ │ │ │ │ └── alignbench_v1_1_judgeby_critiquellm_new.py │ │ │ │ ├── alpaca_eval │ │ │ │ │ ├── alpacav2_judgeby_gpt4.py │ │ │ │ │ ├── alpacav2_judgeby_gpt4_bradleyterry.py │ │ │ │ │ └── alpacav2_judgeby_gpt4_new.py │ │ │ │ ├── arena_hard │ │ │ │ │ ├── README.md │ │ │ │ │ ├── arena_hard_compare.py │ │ │ │ │ ├── arena_hard_compare_bradleyterry.py │ │ │ │ │ └── arena_hard_compare_new.py │ │ │ │ ├── compass_arena_subjective_bench │ │ │ │ │ ├── README_pairwise_bt.md │ │ │ │ │ ├── multiturn │ │ │ │ │ │ ├── pairwise_bt_judge.py │ │ │ │ │ │ ├── pairwise_judge.py │ │ │ │ │ │ └── pointwise_judge.py │ │ │ │ │ └── singleturn │ │ │ │ │ │ ├── pairwise_bt_judge.py │ │ │ │ │ │ ├── pairwise_judge.py │ │ │ │ │ │ └── pointwise_judge.py │ │ │ │ ├── compassarena │ │ │ │ │ ├── compassarena_compare.py │ │ │ │ │ ├── compassarena_compare_bradleyterry.py │ │ │ │ │ └── compassarena_compare_new.py │ │ │ │ ├── compassbench │ │ │ │ │ ├── compassbench_checklist.py │ │ │ │ │ ├── compassbench_compare.py │ │ │ │ │ ├── compassbench_compare_v11.py │ │ │ │ │ ├── compassbench_compare_v11_patch.py │ │ │ │ │ └── compassbench_compare_v12.py │ │ │ │ ├── flames │ │ │ │ │ ├── README.md │ │ │ │ │ ├── flames_gen.py │ │ │ │ │ └── flames_gen_1a58bb.py │ │ │ │ ├── fofo │ │ │ │ │ ├── README.md │ │ │ │ │ ├── fofo_bilingual_judge.py │ │ │ │ │ ├── fofo_bilingual_judge_new.py │ │ │ │ │ ├── fofo_judge.py │ │ │ │ │ └── fofo_judge_new.py │ │ │ │ ├── followbench │ │ │ │ │ ├── followbench_llmeval.py │ │ │ │ │ └── followbench_llmeval_new.py │ │ │ │ ├── hellobench │ │ │ │ │ ├── README.md │ │ │ │ │ └── hellobench.py │ │ │ │ ├── judgerbench │ │ │ │ │ └── judgerbench.py │ │ │ │ ├── multiround │ │ │ │ │ ├── mtbench101_judge.py │ │ │ │ │ ├── mtbench101_judge_new.py │ │ │ │ │ ├── mtbench_single_judge_diff_temp.py │ │ │ │ │ └── mtbench_single_judge_diff_temp_new.py │ │ │ │ ├── wildbench │ │ │ │ │ ├── wildbench.md │ │ │ │ │ ├── wildbench_pair_judge.py │ │ │ │ │ ├── wildbench_pair_judge_bradleyterry.py │ │ │ │ │ └── wildbench_pair_judge_new.py │ │ │ │ └── writingbench │ │ │ │ │ └── writingbench_judge.py │ │ │ ├── summedits │ │ │ │ ├── summedits_gen.py │ │ │ │ ├── summedits_gen_315438.py │ │ │ │ ├── summedits_gen_4fb38b.py │ │ │ │ ├── summedits_ppl.py │ │ │ │ ├── summedits_ppl_1fbeb6.py │ │ │ │ ├── summedits_ppl_3c30d0.py │ │ │ │ └── summedits_ppl_fa58ba.py │ │ │ ├── summscreen │ │ │ │ ├── summscreen_gen.py │ │ │ │ ├── summscreen_gen_653185.py │ │ │ │ └── summscreen_gen_aa5eb3.py │ │ │ ├── supergpqa │ │ │ │ ├── supergpqa_cascade_gen_1545c1.py │ │ │ │ ├── supergpqa_gen.py │ │ │ │ ├── supergpqa_llmjudge_field_gen_1545c1.py │ │ │ │ └── supergpqa_llmjudge_gen_12b8bc.py │ │ │ ├── taco │ │ │ │ ├── README.md │ │ │ │ ├── taco_gen.py │ │ │ │ ├── taco_gen_c7893a.py │ │ │ │ └── taco_levels_gen_411572.py │ │ │ ├── teval │ │ │ │ ├── README.md │ │ │ │ ├── teval_en_gen.py │ │ │ │ ├── teval_en_gen_1ac254.py │ │ │ │ ├── teval_zh_gen.py │ │ │ │ └── teval_zh_gen_1ac254.py │ │ │ ├── triviaqa │ │ │ │ ├── README.md │ │ │ │ ├── triviaqa_gen.py │ │ │ │ ├── triviaqa_gen_0356ec.py │ │ │ │ ├── triviaqa_gen_2121ce.py │ │ │ │ ├── triviaqa_gen_3e39a5.py │ │ │ │ ├── triviaqa_gen_429db5.py │ │ │ │ ├── triviaqa_gen_d297bb.py │ │ │ │ ├── triviaqa_wiki_1shot_gen_20a989.py │ │ │ │ ├── triviaqa_wiki_1shot_gen_bc5f21.py │ │ │ │ ├── triviaqa_wiki_1shot_gen_c87d61.py │ │ │ │ ├── triviaqa_wiki_1shot_gen_eaf81e.py │ │ │ │ └── triviaqa_wiki_gen_d18bf4.py │ │ │ ├── triviaqarc │ │ │ │ ├── triviaqarc_gen.py │ │ │ │ ├── triviaqarc_gen_a2d88a.py │ │ │ │ └── triviaqarc_gen_db6413.py │ │ │ ├── truthfulqa │ │ │ │ ├── truthfulqa_gen.py │ │ │ │ ├── truthfulqa_gen_1e7d8d.py │ │ │ │ └── truthfulqa_gen_5ddc62.py │ │ │ ├── tydiqa │ │ │ │ ├── tydiqa_gen.py │ │ │ │ └── tydiqa_gen_978d2a.py │ │ │ ├── wikibench │ │ │ │ ├── wikibench_few_shot_ppl_c23d79.py │ │ │ │ ├── wikibench_gen.py │ │ │ │ ├── wikibench_gen_0978ad.py │ │ │ │ └── wikibench_gen_f96ece.py │ │ │ ├── wikitext │ │ │ │ ├── wikitext_103_raw_ppl.py │ │ │ │ ├── wikitext_103_raw_ppl_752e2a.py │ │ │ │ ├── wikitext_2_raw_ppl.py │ │ │ │ └── wikitext_2_raw_ppl_752e2a.py │ │ │ ├── winograd │ │ │ │ ├── winograd_ppl.py │ │ │ │ ├── winograd_ppl_8f3049.py │ │ │ │ └── winograd_ppl_b6c7ed.py │ │ │ ├── winogrande │ │ │ │ ├── README.md │ │ │ │ ├── deprecated_winogrande_gen_a9ede5.py │ │ │ │ ├── winogrande_5shot_gen_6447e6.py │ │ │ │ ├── winogrande_5shot_gen_b36770.py │ │ │ │ ├── winogrande_5shot_ll_252f01.py │ │ │ │ ├── winogrande_gen.py │ │ │ │ ├── winogrande_gen_458220.py │ │ │ │ ├── winogrande_gen_a027b6.py │ │ │ │ ├── winogrande_ll.py │ │ │ │ ├── winogrande_ll_c5cf57.py │ │ │ │ ├── winogrande_ppl_55a66e.py │ │ │ │ └── winogrande_ppl_9307fd.py │ │ │ └── xiezhi │ │ │ │ ├── xiezhi_gen.py │ │ │ │ ├── xiezhi_gen_b86cf5.py │ │ │ │ ├── xiezhi_ppl.py │ │ │ │ └── xiezhi_ppl_ea6bd7.py │ │ ├── models │ │ │ ├── accessory │ │ │ │ ├── accessory_llama2_7b.py │ │ │ │ ├── accessory_mixtral_8x7b.py │ │ │ │ └── accessory_sphinx_v2_1k.py │ │ │ ├── alaya │ │ │ │ └── alaya.py │ │ │ ├── aquila │ │ │ │ ├── hf_aquila2_34b.py │ │ │ │ ├── hf_aquila2_7b.py │ │ │ │ ├── hf_aquilachat2_34b.py │ │ │ │ ├── hf_aquilachat2_34b_16k.py │ │ │ │ ├── hf_aquilachat2_7b.py │ │ │ │ └── hf_aquilachat2_7b_16k.py │ │ │ ├── baichuan │ │ │ │ ├── hf_baichuan2_13b_base.py │ │ │ │ ├── hf_baichuan2_13b_chat.py │ │ │ │ ├── hf_baichuan2_7b_base.py │ │ │ │ ├── hf_baichuan2_7b_chat.py │ │ │ │ ├── hf_baichuan_13b_base.py │ │ │ │ ├── hf_baichuan_13b_chat.py │ │ │ │ ├── hf_baichuan_7b.py │ │ │ │ ├── hf_baichuan_m1_14b_base.py │ │ │ │ └── hf_baichuan_m1_14b_instruct.py │ │ │ ├── bailing_api │ │ │ │ ├── bailing-lite-1116.py │ │ │ │ └── bailing-pro-1120.py │ │ │ ├── bluelm │ │ │ │ ├── bluelm_3b.py │ │ │ │ ├── hf_bluelm_7b_base.py │ │ │ │ ├── hf_bluelm_7b_base_32k.py │ │ │ │ ├── hf_bluelm_7b_chat.py │ │ │ │ └── hf_bluelm_7b_chat_32k.py │ │ │ ├── chatglm │ │ │ │ ├── hf_chatglm2_6b.py │ │ │ │ ├── hf_chatglm3_6b.py │ │ │ │ ├── hf_chatglm3_6b_32k.py │ │ │ │ ├── hf_chatglm3_6b_base.py │ │ │ │ ├── hf_chatglm_6b.py │ │ │ │ ├── hf_glm4_9b.py │ │ │ │ ├── hf_glm4_9b_chat.py │ │ │ │ ├── lmdeploy_glm4_9b.py │ │ │ │ ├── lmdeploy_glm4_9b_chat.py │ │ │ │ ├── vllm_chatglm3_6b.py │ │ │ │ ├── vllm_chatglm3_6b_32k.py │ │ │ │ └── vllm_glm4_9b_chat.py │ │ │ ├── claude │ │ │ │ ├── claude.py │ │ │ │ └── claude2.py │ │ │ ├── codegeex2 │ │ │ │ └── hf_codegeex2_6b.py │ │ │ ├── codellama │ │ │ │ ├── hf_codellama_13b.py │ │ │ │ ├── hf_codellama_13b_instruct.py │ │ │ │ ├── hf_codellama_13b_python.py │ │ │ │ ├── hf_codellama_34b.py │ │ │ │ ├── hf_codellama_34b_instruct.py │ │ │ │ ├── hf_codellama_34b_python.py │ │ │ │ ├── hf_codellama_70b.py │ │ │ │ ├── hf_codellama_70b_instruct.py │ │ │ │ ├── hf_codellama_70b_python.py │ │ │ │ ├── hf_codellama_7b.py │ │ │ │ ├── hf_codellama_7b_instruct.py │ │ │ │ └── hf_codellama_7b_python.py │ │ │ ├── deepseek │ │ │ │ ├── deepseek_r1_streaming.py │ │ │ │ ├── hf_deepseek_67b_base.py │ │ │ │ ├── hf_deepseek_67b_chat.py │ │ │ │ ├── hf_deepseek_7b_base.py │ │ │ │ ├── hf_deepseek_7b_chat.py │ │ │ │ ├── hf_deepseek_coder_1_3b_instruct.py │ │ │ │ ├── hf_deepseek_coder_33b_instruct.py │ │ │ │ ├── hf_deepseek_coder_6_7b_instruct.py │ │ │ │ ├── hf_deepseek_moe_16b_base.py │ │ │ │ ├── hf_deepseek_moe_16b_chat.py │ │ │ │ ├── hf_deepseek_r1_distill_llama_70b.py │ │ │ │ ├── hf_deepseek_r1_distill_llama_8b.py │ │ │ │ ├── hf_deepseek_r1_distill_qwen_14b.py │ │ │ │ ├── hf_deepseek_r1_distill_qwen_1_5b.py │ │ │ │ ├── hf_deepseek_r1_distill_qwen_32b.py │ │ │ │ ├── hf_deepseek_r1_distill_qwen_7b.py │ │ │ │ ├── hf_deepseek_v2.py │ │ │ │ ├── hf_deepseek_v2_chat.py │ │ │ │ ├── hf_deepseek_v2_lite.py │ │ │ │ ├── hf_deepseek_v2_lite_chat.py │ │ │ │ ├── lmdeploy_deepseek_67b_base.py │ │ │ │ ├── lmdeploy_deepseek_67b_chat.py │ │ │ │ ├── lmdeploy_deepseek_7b_base.py │ │ │ │ ├── lmdeploy_deepseek_7b_chat.py │ │ │ │ ├── lmdeploy_deepseek_r1_distill_llama_70b.py │ │ │ │ ├── lmdeploy_deepseek_r1_distill_llama_8b.py │ │ │ │ ├── lmdeploy_deepseek_r1_distill_qwen_14b.py │ │ │ │ ├── lmdeploy_deepseek_r1_distill_qwen_1_5b.py │ │ │ │ ├── lmdeploy_deepseek_r1_distill_qwen_32b.py │ │ │ │ ├── lmdeploy_deepseek_r1_distill_qwen_7b.py │ │ │ │ ├── lmdeploy_deepseek_series.py │ │ │ │ ├── lmdeploy_deepseek_v2.py │ │ │ │ ├── lmdeploy_deepseek_v2_5.py │ │ │ │ ├── lmdeploy_deepseek_v2_5_1210.py │ │ │ │ ├── lmdeploy_deepseek_v2_lite.py │ │ │ │ ├── vllm_deepseek_67b_chat.py │ │ │ │ ├── vllm_deepseek_7b_chat.py │ │ │ │ ├── vllm_deepseek_moe_16b_base.py │ │ │ │ └── vllm_deepseek_moe_16b_chat.py │ │ │ ├── falcon │ │ │ │ ├── hf_falcon_40b.py │ │ │ │ └── hf_falcon_7b.py │ │ │ ├── gemini │ │ │ │ ├── gemini_1_5_flash.py │ │ │ │ ├── gemini_1_5_pro.py │ │ │ │ └── gemini_pro.py │ │ │ ├── gemma │ │ │ │ ├── hf_gemma2_27b.py │ │ │ │ ├── hf_gemma2_27b_it.py │ │ │ │ ├── hf_gemma2_2b.py │ │ │ │ ├── hf_gemma2_2b_it.py │ │ │ │ ├── hf_gemma2_9b.py │ │ │ │ ├── hf_gemma2_9b_it.py │ │ │ │ ├── hf_gemma_2b.py │ │ │ │ ├── hf_gemma_2b_it.py │ │ │ │ ├── hf_gemma_7b.py │ │ │ │ ├── hf_gemma_7b_it.py │ │ │ │ ├── lmdeploy_gemma_27b.py │ │ │ │ ├── lmdeploy_gemma_27b_it.py │ │ │ │ ├── lmdeploy_gemma_9b.py │ │ │ │ ├── lmdeploy_gemma_9b_it.py │ │ │ │ ├── vllm_gemma_2b.py │ │ │ │ ├── vllm_gemma_2b_it.py │ │ │ │ ├── vllm_gemma_3_12b_it.py │ │ │ │ ├── vllm_gemma_3_27b_it.py │ │ │ │ ├── vllm_gemma_3_4b_it.py │ │ │ │ ├── vllm_gemma_7b.py │ │ │ │ └── vllm_gemma_7b_it.py │ │ │ ├── hf_internlm │ │ │ │ ├── README.md │ │ │ │ ├── hf_internlm2_1_8b.py │ │ │ │ ├── hf_internlm2_20b.py │ │ │ │ ├── hf_internlm2_5_1_8b_chat.py │ │ │ │ ├── hf_internlm2_5_20b_chat.py │ │ │ │ ├── hf_internlm2_5_7b.py │ │ │ │ ├── hf_internlm2_5_7b_chat.py │ │ │ │ ├── hf_internlm2_7b.py │ │ │ │ ├── hf_internlm2_base_20b.py │ │ │ │ ├── hf_internlm2_base_7b.py │ │ │ │ ├── hf_internlm2_chat_1_8b.py │ │ │ │ ├── hf_internlm2_chat_1_8b_sft.py │ │ │ │ ├── hf_internlm2_chat_20b.py │ │ │ │ ├── hf_internlm2_chat_20b_sft.py │ │ │ │ ├── hf_internlm2_chat_20b_with_system.py │ │ │ │ ├── hf_internlm2_chat_7b.py │ │ │ │ ├── hf_internlm2_chat_7b_sft.py │ │ │ │ ├── hf_internlm2_chat_7b_with_system.py │ │ │ │ ├── hf_internlm2_chat_math_20b.py │ │ │ │ ├── hf_internlm2_chat_math_20b_with_system.py │ │ │ │ ├── hf_internlm2_chat_math_7b.py │ │ │ │ ├── hf_internlm2_chat_math_7b_with_system.py │ │ │ │ ├── hf_internlm2_math_20b.py │ │ │ │ ├── hf_internlm2_math_7b.py │ │ │ │ ├── hf_internlm3_8b_instruct.py │ │ │ │ ├── hf_internlm_20b.py │ │ │ │ ├── hf_internlm_7b.py │ │ │ │ ├── hf_internlm_chat_20b.py │ │ │ │ ├── hf_internlm_chat_7b.py │ │ │ │ ├── lmdeploy_internlm2_1_8b.py │ │ │ │ ├── lmdeploy_internlm2_20b.py │ │ │ │ ├── lmdeploy_internlm2_5_1_8b_chat.py │ │ │ │ ├── lmdeploy_internlm2_5_20b_chat.py │ │ │ │ ├── lmdeploy_internlm2_5_7b.py │ │ │ │ ├── lmdeploy_internlm2_5_7b_chat.py │ │ │ │ ├── lmdeploy_internlm2_5_7b_chat_1m.py │ │ │ │ ├── lmdeploy_internlm2_7b.py │ │ │ │ ├── lmdeploy_internlm2_base_20b.py │ │ │ │ ├── lmdeploy_internlm2_base_7b.py │ │ │ │ ├── lmdeploy_internlm2_chat_1_8b.py │ │ │ │ ├── lmdeploy_internlm2_chat_1_8b_sft.py │ │ │ │ ├── lmdeploy_internlm2_chat_20b.py │ │ │ │ ├── lmdeploy_internlm2_chat_20b_sft.py │ │ │ │ ├── lmdeploy_internlm2_chat_7b.py │ │ │ │ ├── lmdeploy_internlm2_chat_7b_sft.py │ │ │ │ ├── lmdeploy_internlm2_series.py │ │ │ │ ├── lmdeploy_internlm3_8b_instruct.py │ │ │ │ ├── lmdeploy_internlm3_8b_instruct_128k.py │ │ │ │ ├── lmdeploy_internlm_20b.py │ │ │ │ ├── lmdeploy_internlm_7b.py │ │ │ │ ├── lmdeploy_internlm_chat_20b.py │ │ │ │ ├── lmdeploy_internlm_chat_7b.py │ │ │ │ ├── lmdeploy_oreal_32b.py │ │ │ │ ├── vllm_internlm2_chat_1_8b.py │ │ │ │ ├── vllm_internlm2_chat_1_8b_sft.py │ │ │ │ ├── vllm_internlm2_chat_20b.py │ │ │ │ ├── vllm_internlm2_chat_20b_sft.py │ │ │ │ ├── vllm_internlm2_chat_7b.py │ │ │ │ ├── vllm_internlm2_chat_7b_sft.py │ │ │ │ └── vllm_internlm2_series.py │ │ │ ├── hf_llama │ │ │ │ ├── hf_llama2_13b.py │ │ │ │ ├── hf_llama2_13b_chat.py │ │ │ │ ├── hf_llama2_70b.py │ │ │ │ ├── hf_llama2_70b_chat.py │ │ │ │ ├── hf_llama2_7b.py │ │ │ │ ├── hf_llama2_7b_chat.py │ │ │ │ ├── hf_llama3_1_70b_instruct.py │ │ │ │ ├── hf_llama3_1_8b.py │ │ │ │ ├── hf_llama3_1_8b_instruct.py │ │ │ │ ├── hf_llama3_2_3b_instruct.py │ │ │ │ ├── hf_llama3_70b.py │ │ │ │ ├── hf_llama3_70b_instruct.py │ │ │ │ ├── hf_llama3_8b.py │ │ │ │ ├── hf_llama3_8b_instruct.py │ │ │ │ ├── hf_llama_13b.py │ │ │ │ ├── hf_llama_30b.py │ │ │ │ ├── hf_llama_65b.py │ │ │ │ ├── hf_llama_7b.py │ │ │ │ ├── lmdeploy_llama2_13b.py │ │ │ │ ├── lmdeploy_llama2_13b_chat.py │ │ │ │ ├── lmdeploy_llama2_70b.py │ │ │ │ ├── lmdeploy_llama2_70b_chat.py │ │ │ │ ├── lmdeploy_llama2_7b.py │ │ │ │ ├── lmdeploy_llama2_7b_chat.py │ │ │ │ ├── lmdeploy_llama3_1_70b_instruct.py │ │ │ │ ├── lmdeploy_llama3_1_8b.py │ │ │ │ ├── lmdeploy_llama3_1_8b_instruct.py │ │ │ │ ├── lmdeploy_llama3_2_3b_instruct.py │ │ │ │ ├── lmdeploy_llama3_3_70b_instruct.py │ │ │ │ ├── lmdeploy_llama3_70b.py │ │ │ │ ├── lmdeploy_llama3_70b_instruct.py │ │ │ │ ├── lmdeploy_llama3_8b.py │ │ │ │ ├── lmdeploy_llama3_8b_instruct.py │ │ │ │ ├── lmdeploy_llama_13b.py │ │ │ │ ├── lmdeploy_llama_30b.py │ │ │ │ ├── lmdeploy_llama_65b.py │ │ │ │ ├── lmdeploy_llama_7b.py │ │ │ │ └── vllm_llama_series.py │ │ │ ├── huatuogpt │ │ │ │ ├── hf_huatuogpt2_13b.py │ │ │ │ ├── hf_huatuogpt2_7b.py │ │ │ │ ├── hf_huatuogpt_o1_7b.py │ │ │ │ └── hf_huatuogpt_o1_8b.py │ │ │ ├── internlm │ │ │ │ └── internlm_7b.py │ │ │ ├── interns1 │ │ │ │ └── intern_s1.py │ │ │ ├── internvl │ │ │ │ ├── lmdeploy_internvl_2_5_38b.py │ │ │ │ └── lmdeploy_internvl_2_5_8b.py │ │ │ ├── judge_llm │ │ │ │ ├── auto_j │ │ │ │ │ ├── hf_autoj_bilingual_6b.py │ │ │ │ │ ├── hf_autoj_eng_13b.py │ │ │ │ │ ├── hf_autoj_eng_13b_4bit.py │ │ │ │ │ └── hf_autoj_scen_classifier.py │ │ │ │ ├── judgelm │ │ │ │ │ ├── hf_judgelm_13b_v1.py │ │ │ │ │ ├── hf_judgelm_33b_v1.py │ │ │ │ │ └── hf_judgelm_7b_v1.py │ │ │ │ └── pandalm │ │ │ │ │ ├── hf_alpaca_pandalm_7b_v1.py │ │ │ │ │ └── hf_pandalm_7b_v1.py │ │ │ ├── lemur │ │ │ │ └── lemur_70b_chat.py │ │ │ ├── lingowhale │ │ │ │ └── hf_lingowhale_8b.py │ │ │ ├── mistral │ │ │ │ ├── hf_ministral_8b_instruct_2410.py │ │ │ │ ├── hf_mistral_7b_instruct_v0_1.py │ │ │ │ ├── hf_mistral_7b_instruct_v0_2.py │ │ │ │ ├── hf_mistral_7b_instruct_v0_3.py │ │ │ │ ├── hf_mistral_7b_v0_1.py │ │ │ │ ├── hf_mistral_7b_v0_2.py │ │ │ │ ├── hf_mistral_7b_v0_3.py │ │ │ │ ├── hf_mistral_nemo_instruct_2407.py │ │ │ │ ├── hf_mistral_small_instruct_2409.py │ │ │ │ ├── hf_mixtral_8x22b_instruct_v0_1.py │ │ │ │ ├── hf_mixtral_8x22b_v0_1.py │ │ │ │ ├── hf_mixtral_8x7b_instruct_v0_1.py │ │ │ │ ├── hf_mixtral_8x7b_v0_1.py │ │ │ │ ├── lmdeploy_ministral_8b_instruct_2410.py │ │ │ │ ├── lmdeploy_mistral_7b_instruct_v0_3.py │ │ │ │ ├── lmdeploy_mistral_large_instruct_2411.py │ │ │ │ ├── lmdeploy_mistral_nemo_instruct_2407.py │ │ │ │ ├── lmdeploy_mistral_small_instruct_2409.py │ │ │ │ ├── lmdeploy_mixtral_8x22b_instruct_v0_1.py │ │ │ │ ├── lmdeploy_mixtral_large_instruct_2407.py │ │ │ │ ├── mixtral_8x7b_32k.py │ │ │ │ ├── vllm_mistral_7b_instruct_v0_1.py │ │ │ │ ├── vllm_mistral_7b_instruct_v0_2.py │ │ │ │ ├── vllm_mistral_7b_v0_1.py │ │ │ │ ├── vllm_mistral_7b_v0_2.py │ │ │ │ ├── vllm_mixtral_8x22b_instruct_v0_1.py │ │ │ │ ├── vllm_mixtral_8x22b_v0_1.py │ │ │ │ ├── vllm_mixtral_8x7b_instruct_v0_1.py │ │ │ │ ├── vllm_mixtral_8x7b_v0_1.py │ │ │ │ └── vllm_mixtral_large_instruct_2407.py │ │ │ ├── moonshot │ │ │ │ ├── kimi_k2.py │ │ │ │ └── kimi_k2_streaming.py │ │ │ ├── moss │ │ │ │ ├── hf_moss_moon_003_base.py │ │ │ │ └── hf_moss_moon_003_sft.py │ │ │ ├── mpt │ │ │ │ ├── hf_mpt_7b.py │ │ │ │ └── hf_mpt_instruct_7b.py │ │ │ ├── ms_internlm │ │ │ │ └── ms_internlm_chat_7b_8k.py │ │ │ ├── nanbeige │ │ │ │ ├── hf_nanbeige2_16b_chat.py │ │ │ │ ├── hf_nanbeige2_8b_chat.py │ │ │ │ └── hf_nanbeige_16b_chat.py │ │ │ ├── nvidia │ │ │ │ └── lmdeploy_nemotron_70b_instruct_hf.py │ │ │ ├── openai │ │ │ │ ├── gpt_3_5_turbo.py │ │ │ │ ├── gpt_3_5_turbo_0125.py │ │ │ │ ├── gpt_4.py │ │ │ │ ├── gpt_4o_2024_05_13.py │ │ │ │ ├── o1_mini_2024_09_12.py │ │ │ │ └── o1_preview_2024_09_12.py │ │ │ ├── openbmb │ │ │ │ ├── hf_minicpm3_4b.py │ │ │ │ ├── hf_minicpm_2b_dpo_fp32.py │ │ │ │ ├── hf_minicpm_2b_sft_bf16.py │ │ │ │ └── hf_minicpm_2b_sft_fp32.py │ │ │ ├── opt │ │ │ │ ├── hf_opt_125m.py │ │ │ │ └── hf_opt_350m.py │ │ │ ├── others │ │ │ │ ├── hf_abel_7b_001.py │ │ │ │ ├── hf_abel_7b_002.py │ │ │ │ ├── hf_arithmo_mistral_7b.py │ │ │ │ ├── hf_command_r_plus.py │ │ │ │ ├── hf_dbrx_base.py │ │ │ │ ├── hf_dbrx_instruct.py │ │ │ │ ├── hf_dolphin_21_mistral_7b.py │ │ │ │ ├── hf_fashiongpt_70b_v11.py │ │ │ │ ├── hf_gsm8k_rft_llama7b2_u13b.py │ │ │ │ ├── hf_metamath_7b_v1_0.py │ │ │ │ ├── hf_metamath_llemma_7b.py │ │ │ │ ├── hf_metamath_mistral_7b.py │ │ │ │ ├── hf_openchat_35_0106.py │ │ │ │ ├── hf_openchat_35_1210.py │ │ │ │ ├── hf_orionstar_14b_base.py │ │ │ │ ├── hf_orionstar_yi_34b_chat.py │ │ │ │ ├── hf_phi_2.py │ │ │ │ ├── hf_telechat_12b_v2.py │ │ │ │ ├── hf_telechat_52b.py │ │ │ │ ├── hf_telechat_7b.py │ │ │ │ ├── hf_yayi2_30b_base.py │ │ │ │ ├── vllm_dbrx_instruct.py │ │ │ │ └── vllm_orionstar_14b_longchat.py │ │ │ ├── phi │ │ │ │ ├── hf_phi_3_5_MoE_instruct.py │ │ │ │ ├── hf_phi_3_5_mini_instruct.py │ │ │ │ ├── hf_phi_3_medium_4k_instruct.py │ │ │ │ ├── hf_phi_3_mini_4k_instruct.py │ │ │ │ ├── hf_phi_3_small_8k_instruct.py │ │ │ │ └── hf_phi_4.py │ │ │ ├── pulse │ │ │ │ └── hf_pulse_7b.py │ │ │ ├── qwen │ │ │ │ ├── README.md │ │ │ │ ├── hf_qwen1_5_0_5b.py │ │ │ │ ├── hf_qwen1_5_0_5b_chat.py │ │ │ │ ├── hf_qwen1_5_110b.py │ │ │ │ ├── hf_qwen1_5_110b_chat.py │ │ │ │ ├── hf_qwen1_5_14b.py │ │ │ │ ├── hf_qwen1_5_14b_chat.py │ │ │ │ ├── hf_qwen1_5_1_8b.py │ │ │ │ ├── hf_qwen1_5_1_8b_chat.py │ │ │ │ ├── hf_qwen1_5_32b.py │ │ │ │ ├── hf_qwen1_5_32b_chat.py │ │ │ │ ├── hf_qwen1_5_4b.py │ │ │ │ ├── hf_qwen1_5_4b_chat.py │ │ │ │ ├── hf_qwen1_5_72b.py │ │ │ │ ├── hf_qwen1_5_72b_chat.py │ │ │ │ ├── hf_qwen1_5_7b.py │ │ │ │ ├── hf_qwen1_5_7b_chat.py │ │ │ │ ├── hf_qwen1_5_moe_a2_7b.py │ │ │ │ ├── hf_qwen1_5_moe_a2_7b_chat.py │ │ │ │ ├── hf_qwen2_0_5b.py │ │ │ │ ├── hf_qwen2_0_5b_instruct.py │ │ │ │ ├── hf_qwen2_1_5b.py │ │ │ │ ├── hf_qwen2_1_5b_instruct.py │ │ │ │ ├── hf_qwen2_57b_a14b.py │ │ │ │ ├── hf_qwen2_72b.py │ │ │ │ ├── hf_qwen2_7b.py │ │ │ │ ├── hf_qwen2_7b_instruct.py │ │ │ │ ├── hf_qwen_14b.py │ │ │ │ ├── hf_qwen_14b_chat.py │ │ │ │ ├── hf_qwen_1_8b.py │ │ │ │ ├── hf_qwen_1_8b_chat.py │ │ │ │ ├── hf_qwen_72b.py │ │ │ │ ├── hf_qwen_72b_chat.py │ │ │ │ ├── hf_qwen_7b.py │ │ │ │ ├── hf_qwen_7b_chat.py │ │ │ │ ├── lmdeploy_qwen1_5_110b.py │ │ │ │ ├── lmdeploy_qwen1_5_110b_chat.py │ │ │ │ ├── lmdeploy_qwen1_5_14b.py │ │ │ │ ├── lmdeploy_qwen1_5_14b_chat.py │ │ │ │ ├── lmdeploy_qwen1_5_1_8b.py │ │ │ │ ├── lmdeploy_qwen1_5_1_8b_chat.py │ │ │ │ ├── lmdeploy_qwen1_5_32b.py │ │ │ │ ├── lmdeploy_qwen1_5_32b_chat.py │ │ │ │ ├── lmdeploy_qwen1_5_4b.py │ │ │ │ ├── lmdeploy_qwen1_5_4b_chat.py │ │ │ │ ├── lmdeploy_qwen1_5_72b.py │ │ │ │ ├── lmdeploy_qwen1_5_72b_chat.py │ │ │ │ ├── lmdeploy_qwen1_5_7b.py │ │ │ │ ├── lmdeploy_qwen1_5_7b_chat.py │ │ │ │ ├── lmdeploy_qwen1_5_series.py │ │ │ │ ├── lmdeploy_qwen2_1_5b.py │ │ │ │ ├── lmdeploy_qwen2_1_5b_instruct.py │ │ │ │ ├── lmdeploy_qwen2_72b.py │ │ │ │ ├── lmdeploy_qwen2_72b_instruct.py │ │ │ │ ├── lmdeploy_qwen2_7b.py │ │ │ │ ├── lmdeploy_qwen2_7b_instruct.py │ │ │ │ ├── lmdeploy_qwen2_series.py │ │ │ │ ├── lmdeploy_qwen_14b.py │ │ │ │ ├── lmdeploy_qwen_14b_chat.py │ │ │ │ ├── lmdeploy_qwen_1_8b.py │ │ │ │ ├── lmdeploy_qwen_1_8b_chat.py │ │ │ │ ├── lmdeploy_qwen_72b.py │ │ │ │ ├── lmdeploy_qwen_72b_chat.py │ │ │ │ ├── lmdeploy_qwen_7b.py │ │ │ │ ├── lmdeploy_qwen_7b_chat.py │ │ │ │ ├── lmdeploy_qwen_series.py │ │ │ │ ├── ms_qwen_7b_chat.py │ │ │ │ ├── vllm_qwen1_5_0_5b.py │ │ │ │ ├── vllm_qwen1_5_0_5b_chat.py │ │ │ │ ├── vllm_qwen1_5_110b.py │ │ │ │ ├── vllm_qwen1_5_110b_chat.py │ │ │ │ ├── vllm_qwen1_5_14b.py │ │ │ │ ├── vllm_qwen1_5_14b_chat.py │ │ │ │ ├── vllm_qwen1_5_1_8b.py │ │ │ │ ├── vllm_qwen1_5_1_8b_chat.py │ │ │ │ ├── vllm_qwen1_5_32b.py │ │ │ │ ├── vllm_qwen1_5_32b_chat.py │ │ │ │ ├── vllm_qwen1_5_4b.py │ │ │ │ ├── vllm_qwen1_5_4b_chat.py │ │ │ │ ├── vllm_qwen1_5_72b.py │ │ │ │ ├── vllm_qwen1_5_72b_chat.py │ │ │ │ ├── vllm_qwen1_5_7b.py │ │ │ │ ├── vllm_qwen1_5_7b_chat.py │ │ │ │ ├── vllm_qwen1_5_moe_a2_7b.py │ │ │ │ ├── vllm_qwen1_5_moe_a2_7b_chat.py │ │ │ │ ├── vllm_qwen1_5_series.py │ │ │ │ ├── vllm_qwen2_0_5b.py │ │ │ │ ├── vllm_qwen2_0_5b_instruct.py │ │ │ │ ├── vllm_qwen2_1_5b.py │ │ │ │ ├── vllm_qwen2_1_5b_instruct.py │ │ │ │ ├── vllm_qwen2_57b_a14b_instruct.py │ │ │ │ ├── vllm_qwen2_72b.py │ │ │ │ ├── vllm_qwen2_72b_instruct.py │ │ │ │ ├── vllm_qwen2_7b.py │ │ │ │ ├── vllm_qwen2_7b_instruct.py │ │ │ │ ├── vllm_qwen2_series.py │ │ │ │ ├── vllm_qwen_14b.py │ │ │ │ ├── vllm_qwen_14b_chat.py │ │ │ │ ├── vllm_qwen_1_8b.py │ │ │ │ ├── vllm_qwen_1_8b_chat.py │ │ │ │ ├── vllm_qwen_72b.py │ │ │ │ ├── vllm_qwen_72b_chat.py │ │ │ │ ├── vllm_qwen_7b.py │ │ │ │ ├── vllm_qwen_7b_chat.py │ │ │ │ └── vllm_qwen_series.py │ │ │ ├── qwen2_5 │ │ │ │ ├── hf_qwen2_5_0_5b_instruct.py │ │ │ │ ├── hf_qwen2_5_14b_instruct.py │ │ │ │ ├── hf_qwen2_5_1_5b_instruct.py │ │ │ │ ├── hf_qwen2_5_32b_instruct.py │ │ │ │ ├── hf_qwen2_5_3b_instruct.py │ │ │ │ ├── hf_qwen2_5_72b_instruct.py │ │ │ │ ├── hf_qwen2_5_7b_instruct.py │ │ │ │ ├── hf_qwen_2_5_14b.py │ │ │ │ ├── hf_qwen_2_5_32b.py │ │ │ │ ├── hf_qwen_2_5_7b.py │ │ │ │ ├── lmdeploy_qwen2_5_0_5b_instruct.py │ │ │ │ ├── lmdeploy_qwen2_5_14b.py │ │ │ │ ├── lmdeploy_qwen2_5_14b_instruct.py │ │ │ │ ├── lmdeploy_qwen2_5_1_5b.py │ │ │ │ ├── lmdeploy_qwen2_5_1_5b_instruct.py │ │ │ │ ├── lmdeploy_qwen2_5_32b.py │ │ │ │ ├── lmdeploy_qwen2_5_32b_instruct.py │ │ │ │ ├── lmdeploy_qwen2_5_3b_instruct.py │ │ │ │ ├── lmdeploy_qwen2_5_72b.py │ │ │ │ ├── lmdeploy_qwen2_5_72b_instruct.py │ │ │ │ ├── lmdeploy_qwen2_5_7b.py │ │ │ │ ├── lmdeploy_qwen2_5_7b_instruct.py │ │ │ │ ├── vllm_qwen2_5_0_5b_instruct.py │ │ │ │ ├── vllm_qwen2_5_14b_instruct.py │ │ │ │ ├── vllm_qwen2_5_14b_instruct_128k.py │ │ │ │ ├── vllm_qwen2_5_1_5b_instruct.py │ │ │ │ ├── vllm_qwen2_5_32b_instruct.py │ │ │ │ ├── vllm_qwen2_5_32b_instruct_128k.py │ │ │ │ ├── vllm_qwen2_5_3b_instruct.py │ │ │ │ ├── vllm_qwen2_5_72b_instruct.py │ │ │ │ ├── vllm_qwen2_5_72b_instruct_128k.py │ │ │ │ ├── vllm_qwen2_5_7b_instruct.py │ │ │ │ └── vllm_qwen2_5_7b_instruct_128k.py │ │ │ ├── qwen3 │ │ │ │ └── lmdeploy_qwen3_0_6b.py │ │ │ ├── qwq │ │ │ │ ├── lmdeploy_qwq_32b.py │ │ │ │ └── lmdeploy_qwq_32b_preview.py │ │ │ ├── rwkv │ │ │ │ └── rwkv5_3b.py │ │ │ ├── skywork │ │ │ │ ├── hf_skywork_13b.py │ │ │ │ └── lmdeploy_skywork_o1_open_llama3_1_8b_instruct.py │ │ │ ├── tigerbot │ │ │ │ ├── hf_tigerbot_13b_base_v1.py │ │ │ │ ├── hf_tigerbot_13b_base_v2.py │ │ │ │ ├── hf_tigerbot_13b_chat_v1.py │ │ │ │ ├── hf_tigerbot_13b_chat_v2.py │ │ │ │ ├── hf_tigerbot_70b_base.py │ │ │ │ ├── hf_tigerbot_70b_chat_v2.py │ │ │ │ ├── hf_tigerbot_70b_chat_v3.py │ │ │ │ ├── hf_tigerbot_7b_base.py │ │ │ │ ├── hf_tigerbot_7b_base_v3.py │ │ │ │ ├── hf_tigerbot_7b_chat_v3.py │ │ │ │ └── hf_tigerbot_7b_sft.py │ │ │ ├── vicuna │ │ │ │ ├── hf_vicuna_13b_v13.py │ │ │ │ ├── hf_vicuna_13b_v15.py │ │ │ │ ├── hf_vicuna_13b_v15_16k.py │ │ │ │ ├── hf_vicuna_33b_v13.py │ │ │ │ ├── hf_vicuna_7b_v13.py │ │ │ │ ├── hf_vicuna_7b_v15.py │ │ │ │ ├── hf_vicuna_7b_v15_16k.py │ │ │ │ ├── vllm_vicuna_13b_v15_16k.py │ │ │ │ └── vllm_vicuna_7b_v15_16k.py │ │ │ ├── wizardcoder │ │ │ │ ├── hf_wizardcoder_15b.py │ │ │ │ ├── hf_wizardcoder_1b.py │ │ │ │ ├── hf_wizardcoder_3b.py │ │ │ │ ├── hf_wizardcoder_python_13b.py │ │ │ │ └── hf_wizardcoder_python_34b.py │ │ │ ├── wizardlm │ │ │ │ ├── hf_wizardlm_13b_v1_2.py │ │ │ │ ├── hf_wizardlm_70b_v1_0.py │ │ │ │ ├── hf_wizardlm_7b_v1_0.py │ │ │ │ ├── hf_wizardmath_7b_v1_0.py │ │ │ │ ├── hf_wizardmath_7b_v1_1.py │ │ │ │ ├── vllm_wizardlm_13b_v1_2.py │ │ │ │ ├── vllm_wizardlm_70b_v1_0.py │ │ │ │ └── vllm_wizardlm_7b_v1_0.py │ │ │ ├── yi │ │ │ │ ├── hf_yi_1_5_34b.py │ │ │ │ ├── hf_yi_1_5_34b_chat.py │ │ │ │ ├── hf_yi_1_5_6b.py │ │ │ │ ├── hf_yi_1_5_6b_chat.py │ │ │ │ ├── hf_yi_1_5_9b.py │ │ │ │ ├── hf_yi_1_5_9b_chat.py │ │ │ │ ├── hf_yi_34b.py │ │ │ │ ├── hf_yi_34b_chat.py │ │ │ │ ├── hf_yi_6b.py │ │ │ │ ├── hf_yi_6b_chat.py │ │ │ │ ├── lmdeploy_yi_1_5_34b_chat.py │ │ │ │ ├── lmdeploy_yi_1_5_6b_chat.py │ │ │ │ ├── lmdeploy_yi_1_5_9b.py │ │ │ │ ├── lmdeploy_yi_1_5_9b_chat.py │ │ │ │ ├── lmdeploy_yi_34b_chat.py │ │ │ │ ├── lmdeploy_yi_6b_chat.py │ │ │ │ └── lmdeploy_yi_series.py │ │ │ └── zephyr │ │ │ │ ├── hf_zephyr_7b_beta.py │ │ │ │ └── vllm_zephyr_7b_beta.py │ │ └── summarizers │ │ │ ├── OlympiadBench.py │ │ │ ├── PMMEval.py │ │ │ ├── agent_bench.py │ │ │ ├── charm_reason.py │ │ │ ├── chat_OC15.py │ │ │ ├── chat_OC15_multi_faceted.py │ │ │ ├── cibench.py │ │ │ ├── code_passk.py │ │ │ ├── compassbench_v1_1_objective.py │ │ │ ├── compassbench_v1_1_objective_public.py │ │ │ ├── compassbench_v1_3_objective.py │ │ │ ├── compassbench_v1_objective.py │ │ │ ├── contamination.py │ │ │ ├── example.py │ │ │ ├── groups │ │ │ ├── GaokaoBench.py │ │ │ ├── MMLUArabic.py │ │ │ ├── OlympiadBench.py │ │ │ ├── PHYSICS.py │ │ │ ├── PMMEval.py │ │ │ ├── agieval.py │ │ │ ├── babilong.py │ │ │ ├── bbeh.py │ │ │ ├── bbh.py │ │ │ ├── calm.py │ │ │ ├── ceval.py │ │ │ ├── charm_reason.py │ │ │ ├── cibench.py │ │ │ ├── cmmlu.py │ │ │ ├── ds1000.py │ │ │ ├── flores.py │ │ │ ├── humanevalx.py │ │ │ ├── infinitebench.py │ │ │ ├── jigsaw_multilingual.py │ │ │ ├── korbench.py │ │ │ ├── lawbench.py │ │ │ ├── lcbench.py │ │ │ ├── legacy │ │ │ │ └── cibench.py │ │ │ ├── leval.py │ │ │ ├── longbench.py │ │ │ ├── lveval.py │ │ │ ├── mathbench.py │ │ │ ├── mathbench_2024.py │ │ │ ├── mathbench_agent.py │ │ │ ├── mathbench_v1.py │ │ │ ├── mathbench_v1_2024.py │ │ │ ├── mathbench_v1_2024_lang.py │ │ │ ├── mgsm.py │ │ │ ├── mmlu.py │ │ │ ├── mmlu_cf.py │ │ │ ├── mmlu_pro.py │ │ │ ├── mmmlu.py │ │ │ ├── multipl_e.py │ │ │ ├── musr_average.py │ │ │ ├── plugineval.py │ │ │ ├── ruler.py │ │ │ ├── scibench.py │ │ │ ├── scicode.py │ │ │ ├── supergpqa.py │ │ │ ├── teval.py │ │ │ ├── tydiqa.py │ │ │ └── xiezhi.py │ │ │ ├── infinitebench.py │ │ │ ├── internlm2_keyset.py │ │ │ ├── judgedataset_all.py │ │ │ ├── judgerbenchv2.py │ │ │ ├── lawbench.py │ │ │ ├── leaderboard.py │ │ │ ├── leval.py │ │ │ ├── longbench.py │ │ │ ├── longeval_v2.py │ │ │ ├── lveval.py │ │ │ ├── math_agent.py │ │ │ ├── math_baseline.py │ │ │ ├── mathbench.py │ │ │ ├── mathbench_v1.py │ │ │ ├── medium.py │ │ │ ├── mmlu_cf.py │ │ │ ├── mmlu_pro.py │ │ │ ├── mmmlu.py │ │ │ ├── mmmlu_lite.py │ │ │ ├── needlebench.py │ │ │ ├── plugineval.py │ │ │ ├── rewardbench.py │ │ │ ├── ruler.py │ │ │ ├── scicode.py │ │ │ ├── simpleqa.py │ │ │ ├── small.py │ │ │ ├── subjective.py │ │ │ ├── teval.py │ │ │ └── tiny.py │ ├── datasets │ │ ├── CARDBiomedBench.py │ │ ├── ClinicBench.py │ │ ├── Earth_Silver.py │ │ ├── FinanceIQ.py │ │ ├── GaokaoBench.py │ │ ├── IFEval │ │ │ ├── __init__.py │ │ │ ├── evaluation_main.py │ │ │ ├── ifeval.py │ │ │ ├── instructions.py │ │ │ ├── instructions_registry.py │ │ │ └── instructions_util.py │ │ ├── LCBench.py │ │ ├── MMLUArabic.py │ │ ├── MedCalc_Bench.py │ │ ├── MedQA.py │ │ ├── MedXpertQA.py │ │ ├── Medbullets.py │ │ ├── NPHardEval │ │ │ ├── __init__.py │ │ │ ├── cmp_GCP_D.py │ │ │ ├── cmp_KSP.py │ │ │ ├── cmp_TSP_D.py │ │ │ ├── hard_GCP.py │ │ │ ├── hard_MSP.py │ │ │ ├── hard_TSP.py │ │ │ ├── p_BSP.py │ │ │ ├── p_EDP.py │ │ │ ├── p_SPP.py │ │ │ ├── prompts.py │ │ │ └── utils.py │ │ ├── OlympiadBench.py │ │ ├── OpenFinData.py │ │ ├── PMMEval │ │ │ ├── __init__.py │ │ │ ├── flores.py │ │ │ ├── humanevalxl.py │ │ │ ├── mgsm.py │ │ │ ├── mhellaswag.py │ │ │ ├── mifeval.py │ │ │ ├── mifeval_utils │ │ │ │ ├── __init__.py │ │ │ │ ├── combination_checker.py │ │ │ │ ├── detectable_content_checker.py │ │ │ │ ├── detectable_format_checker.py │ │ │ │ ├── keywords_checker.py │ │ │ │ ├── length_constraints_checker.py │ │ │ │ ├── punctuation_checker.py │ │ │ │ └── startend_checker.py │ │ │ ├── mlogiqa.py │ │ │ ├── mmmlu.py │ │ │ └── xnli.py │ │ ├── ProteinLMBench.py │ │ ├── PubMedQA.py │ │ ├── QuALITY.py │ │ ├── SciEval.py │ │ ├── SciKnowEval.py │ │ ├── ScienceQA.py │ │ ├── SeedBench.py │ │ ├── TheoremQA │ │ │ ├── __init__.py │ │ │ ├── legacy.py │ │ │ ├── main.py │ │ │ ├── number_utils.py │ │ │ └── utils.py │ │ ├── __init__.py │ │ ├── advglue.py │ │ ├── afqmcd.py │ │ ├── agieval │ │ │ ├── __init__.py │ │ │ ├── agieval.py │ │ │ ├── constructions.py │ │ │ ├── dataset_loader.py │ │ │ ├── evaluation.py │ │ │ ├── math_equivalence.py │ │ │ ├── post_process.py │ │ │ └── utils.py │ │ ├── aime2024.py │ │ ├── anli.py │ │ ├── anthropics_evals.py │ │ ├── apps.py │ │ ├── arc.py │ │ ├── arc_prize_public_evaluation.py │ │ ├── ax.py │ │ ├── babilong │ │ │ ├── __init__.py │ │ │ ├── babilong.py │ │ │ ├── babilong_utils.py │ │ │ └── prompts.py │ │ ├── base.py │ │ ├── bbeh.py │ │ ├── bbh.py │ │ ├── benbench.py │ │ ├── bigcodebench │ │ │ ├── __init__.py │ │ │ ├── bigcodebench.py │ │ │ └── extractor.py │ │ ├── boolq.py │ │ ├── bustum.py │ │ ├── c3.py │ │ ├── calm │ │ │ ├── __init__.py │ │ │ ├── calm.py │ │ │ ├── data_processing │ │ │ │ ├── __init__.py │ │ │ │ ├── generate_questions.py │ │ │ │ ├── prompt │ │ │ │ │ ├── AC-B_causal_judgement.py │ │ │ │ │ ├── AR-B_CaLM-AR.py │ │ │ │ │ ├── ATE.py │ │ │ │ │ ├── BAS-B_backadj.py │ │ │ │ │ ├── BAS-C_max-BAS.py │ │ │ │ │ ├── BAS-C_min-BAS.py │ │ │ │ │ ├── BAS-C_mix-BAS.py │ │ │ │ │ ├── CA-B_FA.py │ │ │ │ │ ├── CA-B_FP.py │ │ │ │ │ ├── CB-B_collider-bias.py │ │ │ │ │ ├── CDE.py │ │ │ │ │ ├── CEG-O_E-CARE.py │ │ │ │ │ ├── CEI-B.py │ │ │ │ │ ├── CORR-B_correlation.py │ │ │ │ │ ├── CR-B_det-counterfactual.py │ │ │ │ │ ├── CR-C_CRASS.py │ │ │ │ │ ├── EAE-B_exp-away.py │ │ │ │ │ ├── ECI-B_CTB.py │ │ │ │ │ ├── ECI-B_ESC.py │ │ │ │ │ ├── ECI-B_MAVEN-ERE.py │ │ │ │ │ ├── ETT.py │ │ │ │ │ ├── FAS-C_FAS.py │ │ │ │ │ ├── IV-C_CaLM-IV.py │ │ │ │ │ ├── NDE.py │ │ │ │ │ ├── NIE.py │ │ │ │ │ ├── PCD-B_COPA.py │ │ │ │ │ ├── PCD-B_E-CARE.py │ │ │ │ │ ├── PCD-C_COPA.py │ │ │ │ │ ├── PCD-C_E-CARE.py │ │ │ │ │ ├── PN.py │ │ │ │ │ └── PS.py │ │ │ │ └── task_hiearchy.py │ │ │ ├── evaluation │ │ │ │ ├── __init__.py │ │ │ │ ├── accuracy │ │ │ │ │ ├── choice.py │ │ │ │ │ ├── open-ended.py │ │ │ │ │ └── prob.py │ │ │ │ ├── core_metrics.py │ │ │ │ ├── error │ │ │ │ │ └── basic_adversarial │ │ │ │ │ │ ├── AC-B_causal_judgement.py │ │ │ │ │ │ ├── AR-B_CaLM-AR.py │ │ │ │ │ │ ├── AS.py │ │ │ │ │ │ ├── CA-B.py │ │ │ │ │ │ ├── CEI-B.py │ │ │ │ │ │ ├── CLADDER.py │ │ │ │ │ │ ├── CR-C_CRASS.py │ │ │ │ │ │ ├── ECI.py │ │ │ │ │ │ ├── Natural.py │ │ │ │ │ │ ├── PCD-B.py │ │ │ │ │ │ ├── PCD-C.py │ │ │ │ │ │ └── Probability.py │ │ │ │ ├── errors.py │ │ │ │ └── labeling │ │ │ │ │ ├── AC-B_causal_judgement.py │ │ │ │ │ ├── AR-B_CaLM-AR.py │ │ │ │ │ ├── AS.py │ │ │ │ │ ├── CA-B_FA.py │ │ │ │ │ ├── CA-B_FP.py │ │ │ │ │ ├── CEG-O_E-CARE.py │ │ │ │ │ ├── CEI-B.py │ │ │ │ │ ├── CLADDER.py │ │ │ │ │ ├── CR-C_CRASS.py │ │ │ │ │ ├── ECI.py │ │ │ │ │ ├── Natural.py │ │ │ │ │ ├── PCD-B.py │ │ │ │ │ ├── PCD-C.py │ │ │ │ │ ├── Probability.py │ │ │ │ │ └── common_answers.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ └── load_items.py │ │ ├── cb.py │ │ ├── ceval.py │ │ ├── charm.py │ │ ├── chem_exam.py │ │ ├── chembench.py │ │ ├── chid.py │ │ ├── chinese_simpleqa.py │ │ ├── cibench.py │ │ ├── circular.py │ │ ├── civilcomments.py │ │ ├── climaqa.py │ │ ├── clozeTest_maxmin.py │ │ ├── cluewsc.py │ │ ├── cmb.py │ │ ├── cmmlu.py │ │ ├── cmnli.py │ │ ├── cmo_fib.py │ │ ├── cmrc.py │ │ ├── codecompass │ │ │ ├── CodeCompass.py │ │ │ ├── __init__.py │ │ │ ├── codecompass_runner.py │ │ │ ├── evaluator.py │ │ │ ├── executor.py │ │ │ ├── metrics.py │ │ │ └── utils.py │ │ ├── commonsenseqa.py │ │ ├── commonsenseqa_cn.py │ │ ├── compassbench_obj.py │ │ ├── copa.py │ │ ├── crowspairs.py │ │ ├── crowspairs_cn.py │ │ ├── csl.py │ │ ├── custom.py │ │ ├── cvalues.py │ │ ├── dingo.py │ │ ├── drcd.py │ │ ├── drop.py │ │ ├── drop_simple_eval.py │ │ ├── ds1000.py │ │ ├── ds1000_interpreter.py │ │ ├── eese │ │ │ ├── eese.py │ │ │ ├── eese_postprocessors.py │ │ │ └── utils.py │ │ ├── eprstmt.py │ │ ├── flores.py │ │ ├── game24.py │ │ ├── gaokao_math.py │ │ ├── generic.py │ │ ├── govrepcrs.py │ │ ├── gpqa.py │ │ ├── gsm8k.py │ │ ├── gsm_hard.py │ │ ├── healthbench │ │ │ ├── healthbench.py │ │ │ ├── sampler │ │ │ │ └── chat_completion_sampler.py │ │ │ └── types.py │ │ ├── hellaswag.py │ │ ├── hle.py │ │ ├── huggingface.py │ │ ├── humaneval.py │ │ ├── humaneval_multi.py │ │ ├── humaneval_pro.py │ │ ├── humanevalx.py │ │ ├── hungarian_math.py │ │ ├── inference_ppl.py │ │ ├── infinitebench │ │ │ ├── __init__.py │ │ │ ├── infinitebench_codedebug.py │ │ │ ├── infinitebench_coderun.py │ │ │ ├── infinitebench_endia.py │ │ │ ├── infinitebench_enmc.py │ │ │ ├── infinitebench_enqa.py │ │ │ ├── infinitebench_ensum.py │ │ │ ├── infinitebench_mathcalc.py │ │ │ ├── infinitebench_mathfind.py │ │ │ ├── infinitebench_retrievekv.py │ │ │ ├── infinitebench_retrievenumber.py │ │ │ ├── infinitebench_retrievepasskey.py │ │ │ ├── infinitebench_zhqa.py │ │ │ └── utils.py │ │ ├── internsandbox.py │ │ ├── iwslt2017.py │ │ ├── jigsawmultilingual.py │ │ ├── jsonl.py │ │ ├── judge │ │ │ ├── __init__.py │ │ │ ├── judgebench.py │ │ │ ├── judgerbenchv2.py │ │ │ ├── rewardbench.py │ │ │ └── rmb.py │ │ ├── kaoshi.py │ │ ├── kcle.py │ │ ├── korbench │ │ │ ├── __init__.py │ │ │ ├── korbench.py │ │ │ ├── korbench_dataset_config │ │ │ │ ├── __init__.py │ │ │ │ ├── config.yaml │ │ │ │ ├── config_wrapper.py │ │ │ │ └── prompt │ │ │ │ │ ├── 0_shot.yaml │ │ │ │ │ ├── 3_shot.yaml │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── mixed.yaml │ │ │ │ │ ├── self-correction.yaml │ │ │ │ │ └── trick.yaml │ │ │ └── korbench_utils.py │ │ ├── lambada.py │ │ ├── lawbench │ │ │ ├── __init__.py │ │ │ ├── evaluation_functions │ │ │ │ ├── __init__.py │ │ │ │ ├── cjft.py │ │ │ │ ├── flzx.py │ │ │ │ ├── ftcs.py │ │ │ │ ├── jdzy.py │ │ │ │ ├── jec_ac.py │ │ │ │ ├── jec_kd.py │ │ │ │ ├── jetq.py │ │ │ │ ├── lblj.py │ │ │ │ ├── ljp_accusation.py │ │ │ │ ├── ljp_article.py │ │ │ │ ├── ljp_imprison.py │ │ │ │ ├── sjjc.py │ │ │ │ ├── wbfl.py │ │ │ │ ├── wsjd.py │ │ │ │ ├── xxcq.py │ │ │ │ ├── ydlj.py │ │ │ │ ├── yqzy.py │ │ │ │ └── zxfl.py │ │ │ ├── lawbench.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── char_smi.py │ │ │ │ ├── compare_m2_for_evaluation.py │ │ │ │ ├── comprehension_scores.py │ │ │ │ ├── function_utils.py │ │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ ├── alignment.py │ │ │ │ ├── annotator.py │ │ │ │ ├── classifier.py │ │ │ │ ├── merger.py │ │ │ │ ├── tokenization.py │ │ │ │ └── tokenizer.py │ │ │ │ ├── parallel_to_m2.py │ │ │ │ └── rc_f1.py │ │ ├── lcsts.py │ │ ├── leval │ │ │ ├── __init__.py │ │ │ ├── evaluators.py │ │ │ ├── leval_coursera.py │ │ │ ├── leval_financial_qa.py │ │ │ ├── leval_gov_report_summ.py │ │ │ ├── leval_gsm100.py │ │ │ ├── leval_legal_contract_qa.py │ │ │ ├── leval_meeting_summ.py │ │ │ ├── leval_multidoc_qa.py │ │ │ ├── leval_narrattive_qa.py │ │ │ ├── leval_natural_question.py │ │ │ ├── leval_news_summ.py │ │ │ ├── leval_paper_assistant.py │ │ │ ├── leval_patent_summ.py │ │ │ ├── leval_quality.py │ │ │ ├── leval_review_summ.py │ │ │ ├── leval_scientific_qa.py │ │ │ ├── leval_topic_retrieval.py │ │ │ ├── leval_tpo.py │ │ │ └── leval_tvshow_summ.py │ │ ├── livecodebench │ │ │ ├── __init__.py │ │ │ ├── evaluator.py │ │ │ ├── execute_utils.py │ │ │ ├── extract_utils.py │ │ │ ├── livecodebench.py │ │ │ ├── pass_k_utils.py │ │ │ ├── prompts.py │ │ │ └── testing_util.py │ │ ├── livemathbench │ │ │ ├── __init__.py │ │ │ ├── livemathbench.py │ │ │ ├── prompts.py │ │ │ └── utils.py │ │ ├── livereasonbench │ │ │ ├── __init__.py │ │ │ └── livereasonbench.py │ │ ├── livestembench.py │ │ ├── llm_compression.py │ │ ├── lmeval.py │ │ ├── longbench │ │ │ ├── __init__.py │ │ │ ├── evaluators.py │ │ │ ├── longbench_2wikim_qa.py │ │ │ ├── longbench_dureader.py │ │ │ ├── longbench_gov_report.py │ │ │ ├── longbench_hotpot_qa.py │ │ │ ├── longbench_lcc.py │ │ │ ├── longbench_lsht.py │ │ │ ├── longbench_multi_news.py │ │ │ ├── longbench_multifieldqa_en.py │ │ │ ├── longbench_multifieldqa_zh.py │ │ │ ├── longbench_musique.py │ │ │ ├── longbench_narrative_qa.py │ │ │ ├── longbench_passage_count.py │ │ │ ├── longbench_passage_retrieval_en.py │ │ │ ├── longbench_passage_retrieval_zh.py │ │ │ ├── longbench_qasper.py │ │ │ ├── longbench_qmsum.py │ │ │ ├── longbench_repobench.py │ │ │ ├── longbench_samsum.py │ │ │ ├── longbench_trec.py │ │ │ ├── longbench_trivia_qa.py │ │ │ └── longbench_vcsum.py │ │ ├── longbenchv2.py │ │ ├── lveval │ │ │ ├── __init__.py │ │ │ ├── evaluators.py │ │ │ ├── lveval_cmrc_mixup.py │ │ │ ├── lveval_dureader_mixup.py │ │ │ ├── lveval_factrecall_en.py │ │ │ ├── lveval_factrecall_zh.py │ │ │ ├── lveval_hotpotwikiqa_mixup.py │ │ │ ├── lveval_lic_mixup.py │ │ │ ├── lveval_loogle_CR_mixup.py │ │ │ ├── lveval_loogle_MIR_mixup.py │ │ │ ├── lveval_loogle_SD_mixup.py │ │ │ ├── lveval_multifieldqa_en_mixup.py │ │ │ └── lveval_multifieldqa_zh_mixup.py │ │ ├── mastermath2024v1.py │ │ ├── matbench │ │ │ ├── __init__.py │ │ │ ├── matbench.py │ │ │ └── post_process.py │ │ ├── math.py │ │ ├── math401.py │ │ ├── math_intern.py │ │ ├── mathbench.py │ │ ├── mbpp.py │ │ ├── mbpp_pro.py │ │ ├── medbench │ │ │ ├── __init__.py │ │ │ ├── constructions.py │ │ │ ├── dataset_loader.py │ │ │ ├── evaluation.py │ │ │ ├── math_equivalence.py │ │ │ ├── medbench.py │ │ │ ├── post_process.py │ │ │ └── utils.py │ │ ├── medmcqa.py │ │ ├── mgsm.py │ │ ├── mmlu.py │ │ ├── mmlu_cf.py │ │ ├── mmlu_pro.py │ │ ├── mmmlu.py │ │ ├── multipl_e.py │ │ ├── multirc.py │ │ ├── musr │ │ │ ├── __init__.py │ │ │ ├── murder_mystery_solved_ex.py │ │ │ ├── musr.py │ │ │ ├── object_placements_solved_ex.py │ │ │ ├── team_allocation_solved_ex.py │ │ │ └── tree.py │ │ ├── narrativeqa.py │ │ ├── natural_question.py │ │ ├── natural_question_cn.py │ │ ├── needlebench │ │ │ ├── __init__.py │ │ │ ├── atc.py │ │ │ ├── atc_choice.py │ │ │ ├── multi.py │ │ │ ├── origin.py │ │ │ └── parallel.py │ │ ├── needlebench_v2 │ │ │ ├── __init__.py │ │ │ ├── atc.py │ │ │ ├── atc_elder_only.py │ │ │ ├── multi.py │ │ │ ├── origin.py │ │ │ └── parallel.py │ │ ├── nejmaibench.py │ │ ├── obqa.py │ │ ├── olymmath.py │ │ ├── omni_math.py │ │ ├── phybench │ │ │ ├── EED.py │ │ │ ├── __init__.py │ │ │ ├── box_extract.py │ │ │ ├── extended_zss.py │ │ │ ├── latex_pre_process.py │ │ │ └── phybench.py │ │ ├── physics.py │ │ ├── piqa.py │ │ ├── py150.py │ │ ├── qasper.py │ │ ├── qaspercut.py │ │ ├── race.py │ │ ├── rbench.py │ │ ├── realtoxicprompts.py │ │ ├── reasonbench │ │ │ ├── ReasonBenchDataset.py │ │ │ └── __init__.py │ │ ├── record.py │ │ ├── rolebench.py │ │ ├── ruler │ │ │ ├── __init__.py │ │ │ ├── ruler_cwe.py │ │ │ ├── ruler_fwe.py │ │ │ ├── ruler_niah.py │ │ │ ├── ruler_qa.py │ │ │ └── ruler_vt.py │ │ ├── s3eval.py │ │ ├── safety.py │ │ ├── sage │ │ │ ├── dataset_loader.py │ │ │ ├── evaluation.py │ │ │ └── prompt.py │ │ ├── scibench.py │ │ ├── scicode.py │ │ ├── simpleqa.py │ │ ├── siqa.py │ │ ├── smolinstruct.py │ │ ├── squad20.py │ │ ├── srbench.py │ │ ├── storycloze.py │ │ ├── strategyqa.py │ │ ├── subjective │ │ │ ├── __init__.py │ │ │ ├── alignbench.py │ │ │ ├── alpacaeval.py │ │ │ ├── arena_hard.py │ │ │ ├── commonbench.py │ │ │ ├── compass_arena.py │ │ │ ├── compass_arena_subjective_bench.py │ │ │ ├── compassbench.py │ │ │ ├── compassbench_checklist.py │ │ │ ├── compassbench_control_length_bias.py │ │ │ ├── corev2.py │ │ │ ├── creationbench.py │ │ │ ├── flames.py │ │ │ ├── fofo.py │ │ │ ├── followbench.py │ │ │ ├── hellobench.py │ │ │ ├── judgerbench.py │ │ │ ├── mtbench.py │ │ │ ├── mtbench101.py │ │ │ ├── multiround.py │ │ │ ├── subjective_cmp.py │ │ │ ├── utils.py │ │ │ ├── wildbench.py │ │ │ └── writingbench.py │ │ ├── summedits.py │ │ ├── summscreen.py │ │ ├── supergpqa │ │ │ ├── __init__.py │ │ │ ├── supergpqa.py │ │ │ ├── supergpqa_dataset_config │ │ │ │ ├── config_default.yaml │ │ │ │ ├── config_reasoning_models.yaml │ │ │ │ ├── config_wrapper.py │ │ │ │ └── prompt │ │ │ │ │ ├── five-shot.yaml │ │ │ │ │ ├── robustness-exp.yaml │ │ │ │ │ ├── zero-shot-with-subfield.yaml │ │ │ │ │ └── zero-shot.yaml │ │ │ ├── supergpqa_eval.py │ │ │ └── supergpqa_utils.py │ │ ├── svamp.py │ │ ├── tabmwp.py │ │ ├── taco.py │ │ ├── teval │ │ │ ├── __init__.py │ │ │ ├── evaluators │ │ │ │ ├── __init__.py │ │ │ │ ├── instruct_evaluator.py │ │ │ │ ├── planning_evaluator.py │ │ │ │ ├── reason_retrieve_understand_evaluator.py │ │ │ │ └── review_evaluator.py │ │ │ ├── schema.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── convert_results.py │ │ │ │ ├── format_load.py │ │ │ │ ├── meta_template.py │ │ │ │ └── template.py │ │ ├── tnews.py │ │ ├── triviaqa.py │ │ ├── triviaqarc.py │ │ ├── truthfulqa.py │ │ ├── tydiqa.py │ │ ├── wic.py │ │ ├── wikibench.py │ │ ├── winograd.py │ │ ├── winogrande.py │ │ ├── wnli.py │ │ ├── wsc.py │ │ ├── xcopa.py │ │ ├── xiezhi.py │ │ ├── xlsum.py │ │ └── xsum.py │ ├── evaluator │ │ ├── __init__.py │ │ ├── cascade_evaluator.py │ │ ├── generic_llm_evaluator.py │ │ └── math_evaluator.py │ ├── lagent │ │ ├── actions │ │ │ ├── ipython_interpreter.py │ │ │ └── python_interpreter.py │ │ └── agents │ │ │ └── react.py │ ├── metrics │ │ ├── __init__.py │ │ ├── dump_results.py │ │ ├── mme_score.py │ │ └── seedbench.py │ ├── models │ │ ├── __init__.py │ │ ├── accessory.py │ │ ├── ai360_api.py │ │ ├── alaya.py │ │ ├── baichuan_api.py │ │ ├── baidu_api.py │ │ ├── bailing_api_oc.py │ │ ├── base.py │ │ ├── base_api.py │ │ ├── bluelm_api.py │ │ ├── bytedance_api.py │ │ ├── claude_allesapin.py │ │ ├── claude_api │ │ │ ├── __init__.py │ │ │ ├── claude_api.py │ │ │ └── postprocessors.py │ │ ├── claude_sdk_api.py │ │ ├── deepseek_api.py │ │ ├── doubao.py │ │ ├── doubao_api.py │ │ ├── gemini_api.py │ │ ├── glm.py │ │ ├── huggingface.py │ │ ├── huggingface_above_v4_33.py │ │ ├── hunyuan_api.py │ │ ├── intern_model.py │ │ ├── interntrain.py │ │ ├── krgpt_api.py │ │ ├── lagent.py │ │ ├── langchain.py │ │ ├── lightllm_api.py │ │ ├── llama2.py │ │ ├── lmdeploy_with_tf_above_v4_33.py │ │ ├── minimax_api.py │ │ ├── mistral_api.py │ │ ├── mixtral.py │ │ ├── modelscope.py │ │ ├── moonshot_api.py │ │ ├── nanbeige_api.py │ │ ├── openai_api.py │ │ ├── openai_streaming.py │ │ ├── pangu_api.py │ │ ├── qwen_api.py │ │ ├── rendu_api.py │ │ ├── sensetime_api.py │ │ ├── stepfun_api.py │ │ ├── turbomind.py │ │ ├── turbomind_api.py │ │ ├── turbomind_with_tf_above_v4_33.py │ │ ├── unigpt_api.py │ │ ├── vllm.py │ │ ├── vllm_with_tf_above_v4_33.py │ │ ├── xunfei_api.py │ │ ├── yayi_api.py │ │ ├── yi_api.py │ │ ├── zhipuai_api.py │ │ └── zhipuai_v2_api.py │ ├── openicl │ │ ├── __init__.py │ │ ├── icl_dataset_reader.py │ │ ├── icl_evaluator │ │ │ ├── __init__.py │ │ │ ├── code_evaluator.py │ │ │ ├── hf_metrics │ │ │ │ ├── accuracy.py │ │ │ │ ├── rouge.py │ │ │ │ ├── sacrebleu.py │ │ │ │ └── squad.py │ │ │ ├── icl_agent_evaluator.py │ │ │ ├── icl_aucroc_evaluator.py │ │ │ ├── icl_base_evaluator.py │ │ │ ├── icl_bpc_evaluator.py │ │ │ ├── icl_circular_evaluator.py │ │ │ ├── icl_em_evaluator.py │ │ │ ├── icl_hf_evaluator.py │ │ │ ├── icl_jieba_rouge_evaluator.py │ │ │ ├── icl_judge_evaluator.py │ │ │ ├── icl_korbench_evaluator.py │ │ │ ├── icl_misc_evaluator.py │ │ │ ├── icl_plugin_evaluator.py │ │ │ ├── icl_toxic_evaluator.py │ │ │ └── lm_evaluator.py │ │ ├── icl_inferencer │ │ │ ├── __init__.py │ │ │ ├── icl_agent_inferencer.py │ │ │ ├── icl_attack_inferencer.py │ │ │ ├── icl_base_inferencer.py │ │ │ ├── icl_chat_inferencer.py │ │ │ ├── icl_clp_inferencer.py │ │ │ ├── icl_gen_inferencer.py │ │ │ ├── icl_inference_ppl_only_inferencer.py │ │ │ ├── icl_ll_inferencer.py │ │ │ ├── icl_mink_percent_inferencer.py │ │ │ ├── icl_ppl_inferencer.py │ │ │ ├── icl_ppl_only_inferencer.py │ │ │ ├── icl_sc_inferencer.py │ │ │ ├── icl_sw_ce_loss_inferencer.py │ │ │ └── icl_tot_inferencer.py │ │ ├── icl_prompt_template.py │ │ ├── icl_retriever │ │ │ ├── __init__.py │ │ │ ├── icl_base_retriever.py │ │ │ ├── icl_bm25_retriever.py │ │ │ ├── icl_dpp_retriever.py │ │ │ ├── icl_fix_k_retriever.py │ │ │ ├── icl_mdl_retriever.py │ │ │ ├── icl_random_retriever.py │ │ │ ├── icl_sliding_k_retriever.py │ │ │ ├── icl_topk_retriever.py │ │ │ ├── icl_votek_retriever.py │ │ │ └── icl_zero_retriever.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ └── logging.py │ ├── partitioners │ │ ├── __init__.py │ │ ├── base.py │ │ ├── naive.py │ │ ├── num_worker.py │ │ ├── size.py │ │ ├── sub_naive.py │ │ ├── sub_num_worker.py │ │ └── sub_size.py │ ├── registry.py │ ├── runners │ │ ├── __init__.py │ │ ├── base.py │ │ ├── dlc.py │ │ ├── local.py │ │ ├── local_api.py │ │ ├── rjob.py │ │ ├── slurm.py │ │ ├── slurm_sequential.py │ │ └── volc.py │ ├── summarizers │ │ ├── __init__.py │ │ ├── circular.py │ │ ├── default.py │ │ ├── default_subjective.py │ │ ├── llm_compression.py │ │ ├── multi_faceted.py │ │ ├── multi_model.py │ │ ├── needlebench.py │ │ ├── subjective │ │ │ ├── __init__.py │ │ │ ├── alignmentbench.py │ │ │ ├── all_obj.py │ │ │ ├── alpacaeval.py │ │ │ ├── arenahard.py │ │ │ ├── charm.py │ │ │ ├── common_summarizer.py │ │ │ ├── compass_arena.py │ │ │ ├── compass_arena_bradley_terry.py │ │ │ ├── compassbench.py │ │ │ ├── compassbench_v13.py │ │ │ ├── corev2.py │ │ │ ├── creationbench.py │ │ │ ├── flames.py │ │ │ ├── fofo.py │ │ │ ├── followbench.py │ │ │ ├── mtbench.py │ │ │ ├── mtbench101.py │ │ │ ├── multiround.py │ │ │ ├── qacompassbench.py │ │ │ ├── subjective.py │ │ │ ├── subjective_post_process.py │ │ │ ├── utils.py │ │ │ └── wildbench.py │ │ └── summarizer_pretrain.py │ ├── tasks │ │ ├── __init__.py │ │ ├── base.py │ │ ├── llm_eval.py │ │ ├── openicl_attack.py │ │ ├── openicl_eval.py │ │ ├── openicl_infer.py │ │ ├── outer_eval │ │ │ └── alpacaeval.py │ │ └── subjective_eval.py │ └── utils │ │ ├── __init__.py │ │ ├── abbr.py │ │ ├── auxiliary.py │ │ ├── build.py │ │ ├── collect_env.py │ │ ├── datasets.py │ │ ├── datasets_info.py │ │ ├── dependency.py │ │ ├── dict_postprocessors.py │ │ ├── file.py │ │ ├── fileio.py │ │ ├── lark.py │ │ ├── logging.py │ │ ├── menu.py │ │ ├── network.py │ │ ├── prompt.py │ │ ├── result_station.py │ │ ├── run.py │ │ ├── text_postprocessors.py │ │ └── types.py │ ├── run.py │ ├── setup.py │ ├── tests │ ├── dataset │ │ ├── test_humaneval.py │ │ ├── test_local_datasets.py │ │ └── test_ms_datasets.py │ ├── openicl │ │ └── test_prompt_template.py │ └── prompt │ │ ├── test_api_template_parser.py │ │ ├── test_lm_template_parser.py │ │ └── test_prompt_list.py │ └── tools │ ├── case_analyzer.py │ ├── collect_code_preds.py │ ├── compare_configs.py │ ├── convert_alignmentbench.py │ ├── list_configs.py │ ├── prediction_merger.py │ ├── prompt_viewer.py │ ├── test_api_model.py │ ├── update_dataset_suffix.py │ └── viz_multi_model.py ├── generate.py └── training ├── README.md ├── llama_factory_sdar ├── .dockerignore ├── .env.local ├── .gitattributes ├── .gitignore ├── .pre-commit-config.yaml ├── CITATION.cff ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── README_zh.md ├── assets │ ├── alaya_new.svg │ ├── logo.png │ ├── serpapi.svg │ ├── warp.jpg │ ├── wechat.jpg │ ├── wechat_alaya.png │ └── wechat_npu.jpg ├── data │ ├── README.md │ ├── README_zh.md │ ├── alpaca_en_demo.json │ ├── alpaca_zh_demo.json │ ├── belle_multiturn │ │ └── belle_multiturn.py │ ├── c4_demo.jsonl │ ├── dataset_info.json │ ├── dpo_en_demo.json │ ├── dpo_zh_demo.json │ ├── glaive_toolcall_en_demo.json │ ├── glaive_toolcall_zh_demo.json │ ├── hh_rlhf_en │ │ └── hh_rlhf_en.py │ ├── identity.json │ ├── kto_en_demo.json │ ├── mllm_audio_demo.json │ ├── mllm_demo.json │ ├── mllm_demo_data │ │ ├── 1.jpg │ │ ├── 1.mp3 │ │ ├── 1.mp4 │ │ ├── 2.avi │ │ ├── 2.jpg │ │ ├── 2.wav │ │ ├── 3.flac │ │ ├── 3.jpg │ │ ├── 3.mp4 │ │ ├── 4.mp3 │ │ └── 4.mp4 │ ├── mllm_video_audio_demo.json │ ├── mllm_video_demo.json │ ├── ultra_chat │ │ └── ultra_chat.py │ └── wiki_demo.txt ├── docker │ ├── docker-cuda │ │ ├── Dockerfile │ │ ├── Dockerfile.base │ │ ├── README.md │ │ └── docker-compose.yml │ ├── docker-npu │ │ ├── Dockerfile │ │ └── docker-compose.yml │ └── docker-rocm │ │ ├── Dockerfile │ │ └── docker-compose.yml ├── evaluation │ ├── ceval │ │ ├── ceval.py │ │ ├── ceval.zip │ │ └── mapping.json │ ├── cmmlu │ │ ├── cmmlu.py │ │ ├── cmmlu.zip │ │ └── mapping.json │ └── mmlu │ │ ├── mapping.json │ │ ├── mmlu.py │ │ └── mmlu.zip ├── examples │ ├── README.md │ ├── README_zh.md │ ├── accelerate │ │ ├── fsdp_config.yaml │ │ └── fsdp_config_offload.yaml │ ├── deepspeed │ │ ├── ds_z0_config.json │ │ ├── ds_z2_config.json │ │ ├── ds_z2_offload_config.json │ │ ├── ds_z3_config.json │ │ └── ds_z3_offload_config.json │ ├── extras │ │ ├── adam_mini │ │ │ └── qwen2_full_sft.yaml │ │ ├── apollo │ │ │ └── llama3_full_sft.yaml │ │ ├── badam │ │ │ └── llama3_full_sft.yaml │ │ ├── dft │ │ │ └── qwen2_full_sft.yaml │ │ ├── fsdp_qlora │ │ │ ├── llama3_lora_sft.yaml │ │ │ └── train.sh │ │ ├── galore │ │ │ └── llama3_full_sft.yaml │ │ ├── llama_pro │ │ │ ├── expand.sh │ │ │ └── llama3_freeze_sft.yaml │ │ ├── loraplus │ │ │ └── llama3_lora_sft.yaml │ │ ├── mod │ │ │ └── llama3_full_sft.yaml │ │ ├── muon │ │ │ └── qwen2_full_sft.yaml │ │ ├── nlg_eval │ │ │ └── llama3_lora_predict.yaml │ │ ├── oft │ │ │ ├── llama3_oft_sft.yaml │ │ │ └── qwen2_5vl_oft_sft.yaml │ │ ├── pissa │ │ │ ├── init.sh │ │ │ └── llama3_lora_sft.yaml │ │ └── qoft │ │ │ ├── llama3_oft_sft_awq.yaml │ │ │ ├── llama3_oft_sft_bnb_npu.yaml │ │ │ └── llama3_oft_sft_gptq.yaml │ ├── inference │ │ ├── llama3.yaml │ │ ├── llama3_full_sft.yaml │ │ ├── llama3_lora_sft.yaml │ │ └── qwen2_5vl.yaml │ ├── merge_lora │ │ ├── llama3_full_sft.yaml │ │ ├── llama3_gptq.yaml │ │ ├── llama3_lora_sft.yaml │ │ └── qwen2_5vl_lora_sft.yaml │ ├── train_full_sdar │ │ └── sdar_4b │ │ │ ├── sdar_4b_math_cot_full.yaml │ │ │ └── sdar_8b_math_cot_full.yaml │ ├── train_lora │ │ ├── gpt_lora_sft.yaml │ │ ├── llama3_lora_dpo.yaml │ │ ├── llama3_lora_eval.yaml │ │ ├── llama3_lora_kto.yaml │ │ ├── llama3_lora_ppo.yaml │ │ ├── llama3_lora_pretrain.yaml │ │ ├── llama3_lora_reward.yaml │ │ ├── llama3_lora_sft.sh │ │ ├── llama3_lora_sft.yaml │ │ ├── llama3_lora_sft_ds3.yaml │ │ ├── llama3_lora_sft_ray.yaml │ │ ├── llama3_preprocess.yaml │ │ ├── llama4_lora_sft_ds3.yaml │ │ ├── qwen2_5vl_lora_dpo.yaml │ │ └── qwen2_5vl_lora_sft.yaml │ └── train_qlora │ │ ├── llama3_lora_sft_aqlm.yaml │ │ ├── llama3_lora_sft_awq.yaml │ │ ├── llama3_lora_sft_bnb_npu.yaml │ │ ├── llama3_lora_sft_gptq.yaml │ │ └── llama3_lora_sft_otfq.yaml ├── pyproject.toml ├── requirements.txt ├── run_local.sh ├── scripts │ ├── api_example │ │ ├── test_image.py │ │ └── test_toolcall.py │ ├── convert_ckpt │ │ ├── llamafy_baichuan2.py │ │ ├── llamafy_qwen.py │ │ └── tiny_llama4.py │ ├── eval_bleu_rouge.py │ ├── llama_pro.py │ ├── loftq_init.py │ ├── pissa_init.py │ ├── qwen_omni_merge.py │ ├── stat_utils │ │ ├── cal_flops.py │ │ ├── cal_lr.py │ │ ├── cal_mfu.py │ │ ├── cal_ppl.py │ │ └── length_cdf.py │ └── vllm_infer.py ├── setup.py ├── src │ ├── api.py │ ├── llamafactory │ │ ├── __init__.py │ │ ├── api │ │ │ ├── __init__.py │ │ │ ├── app.py │ │ │ ├── chat.py │ │ │ ├── common.py │ │ │ └── protocol.py │ │ ├── chat │ │ │ ├── __init__.py │ │ │ ├── base_engine.py │ │ │ ├── chat_model.py │ │ │ ├── hf_engine.py │ │ │ ├── sglang_engine.py │ │ │ └── vllm_engine.py │ │ ├── cli.py │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── collator.py │ │ │ ├── converter.py │ │ │ ├── data_utils.py │ │ │ ├── formatter.py │ │ │ ├── loader.py │ │ │ ├── mm_plugin.py │ │ │ ├── parser.py │ │ │ ├── processor │ │ │ │ ├── __init__.py │ │ │ │ ├── feedback.py │ │ │ │ ├── pairwise.py │ │ │ │ ├── pretrain.py │ │ │ │ ├── processor_utils.py │ │ │ │ ├── supervised.py │ │ │ │ └── unsupervised.py │ │ │ ├── template.py │ │ │ └── tool_utils.py │ │ ├── eval │ │ │ ├── __init__.py │ │ │ ├── evaluator.py │ │ │ └── template.py │ │ ├── extras │ │ │ ├── __init__.py │ │ │ ├── constants.py │ │ │ ├── env.py │ │ │ ├── logging.py │ │ │ ├── misc.py │ │ │ ├── packages.py │ │ │ └── ploting.py │ │ ├── hparams │ │ │ ├── __init__.py │ │ │ ├── data_args.py │ │ │ ├── evaluation_args.py │ │ │ ├── finetuning_args.py │ │ │ ├── generating_args.py │ │ │ ├── model_args.py │ │ │ ├── parser.py │ │ │ └── training_args.py │ │ ├── launcher.py │ │ ├── model │ │ │ ├── __init__.py │ │ │ ├── adapter.py │ │ │ ├── loader.py │ │ │ ├── model_utils │ │ │ │ ├── __init__.py │ │ │ │ ├── attention.py │ │ │ │ ├── checkpointing.py │ │ │ │ ├── embedding.py │ │ │ │ ├── kv_cache.py │ │ │ │ ├── liger_kernel.py │ │ │ │ ├── longlora.py │ │ │ │ ├── misc.py │ │ │ │ ├── mod.py │ │ │ │ ├── moe.py │ │ │ │ ├── packing.py │ │ │ │ ├── quantization.py │ │ │ │ ├── rope.py │ │ │ │ ├── unsloth.py │ │ │ │ ├── valuehead.py │ │ │ │ └── visual.py │ │ │ └── patcher.py │ │ ├── third_party │ │ │ ├── __init__.py │ │ │ └── muon │ │ │ │ ├── __init__.py │ │ │ │ └── muon.py │ │ ├── train │ │ │ ├── __init__.py │ │ │ ├── callbacks.py │ │ │ ├── dpo │ │ │ │ ├── __init__.py │ │ │ │ ├── trainer.py │ │ │ │ └── workflow.py │ │ │ ├── kto │ │ │ │ ├── __init__.py │ │ │ │ ├── trainer.py │ │ │ │ └── workflow.py │ │ │ ├── ppo │ │ │ │ ├── __init__.py │ │ │ │ ├── ppo_utils.py │ │ │ │ ├── trainer.py │ │ │ │ └── workflow.py │ │ │ ├── pt │ │ │ │ ├── __init__.py │ │ │ │ ├── trainer.py │ │ │ │ └── workflow.py │ │ │ ├── rm │ │ │ │ ├── __init__.py │ │ │ │ ├── metric.py │ │ │ │ ├── trainer.py │ │ │ │ └── workflow.py │ │ │ ├── sft │ │ │ │ ├── __init__.py │ │ │ │ ├── metric.py │ │ │ │ ├── trainer.py │ │ │ │ └── workflow.py │ │ │ ├── test_utils.py │ │ │ ├── trainer_utils.py │ │ │ └── tuner.py │ │ └── webui │ │ │ ├── __init__.py │ │ │ ├── chatter.py │ │ │ ├── common.py │ │ │ ├── components │ │ │ ├── __init__.py │ │ │ ├── chatbot.py │ │ │ ├── data.py │ │ │ ├── eval.py │ │ │ ├── export.py │ │ │ ├── footer.py │ │ │ ├── infer.py │ │ │ ├── top.py │ │ │ └── train.py │ │ │ ├── control.py │ │ │ ├── css.py │ │ │ ├── engine.py │ │ │ ├── interface.py │ │ │ ├── locales.py │ │ │ ├── manager.py │ │ │ └── runner.py │ ├── train.py │ └── webui.py └── tests │ ├── check_license.py │ ├── data │ ├── processor │ │ ├── test_feedback.py │ │ ├── test_pairwise.py │ │ ├── test_processor_utils.py │ │ ├── test_supervised.py │ │ └── test_unsupervised.py │ ├── test_collator.py │ ├── test_converter.py │ ├── test_formatter.py │ ├── test_loader.py │ ├── test_mm_plugin.py │ └── test_template.py │ ├── e2e │ ├── test_chat.py │ ├── test_sglang.py │ └── test_train.py │ ├── eval │ └── test_eval_template.py │ ├── model │ ├── model_utils │ │ ├── test_add_tokens.py │ │ ├── test_attention.py │ │ ├── test_checkpointing.py │ │ ├── test_misc.py │ │ ├── test_packing.py │ │ └── test_visual.py │ ├── test_base.py │ ├── test_freeze.py │ ├── test_full.py │ ├── test_lora.py │ └── test_pissa.py │ ├── train │ └── test_sft_trainer.py │ └── version.txt ├── llamafactory_full_env.yml └── model ├── SDAR-4B-Chat ├── README.md ├── added_tokens.json ├── chat_template.jinja ├── config.json ├── configuration_sdar.py ├── fused_linear_diffusion_cross_entropy.py ├── generation_config.json ├── merges.txt ├── model.safetensors.index.json ├── modeling_sdar.py ├── special_tokens_map.json ├── tokenization_qwen2.py ├── tokenization_qwen2_fast.py ├── tokenizer.json ├── tokenizer_config.json └── vocab.json └── SDAR-8B-Chat ├── README.md ├── added_tokens.json ├── chat_template.jinja ├── config copy.json ├── config.json ├── configuration_sdar.py ├── fused_linear_diffusion_cross_entropy.py ├── generation_config.json ├── merges.txt ├── model.safetensors.index.json ├── modeling_sdar.py ├── special_tokens_map.json ├── tokenization_qwen2.py ├── tokenization_qwen2_fast.py ├── tokenizer.json ├── tokenizer_config.json └── vocab.json /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/.gitmodules -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/README.md -------------------------------------------------------------------------------- /assets/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/assets/.DS_Store -------------------------------------------------------------------------------- /assets/Performace_and_speed.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/assets/Performace_and_speed.svg -------------------------------------------------------------------------------- /assets/SDAR_doc_head.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/assets/SDAR_doc_head.png -------------------------------------------------------------------------------- /assets/SDAR_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/assets/SDAR_logo.png -------------------------------------------------------------------------------- /assets/logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/assets/logo.jpg -------------------------------------------------------------------------------- /assets/table1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/assets/table1.png -------------------------------------------------------------------------------- /assets/table2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/assets/table2.png -------------------------------------------------------------------------------- /assets/table2_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/assets/table2_1.png -------------------------------------------------------------------------------- /assets/table3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/assets/table3.png -------------------------------------------------------------------------------- /assets/wechat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/assets/wechat.jpg -------------------------------------------------------------------------------- /evaluation/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/README.md -------------------------------------------------------------------------------- /evaluation/environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/environment.yml -------------------------------------------------------------------------------- /evaluation/opencompass/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/.gitignore -------------------------------------------------------------------------------- /evaluation/opencompass/.owners.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/.owners.yml -------------------------------------------------------------------------------- /evaluation/opencompass/.pre-commit-config-zh-cn.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/.pre-commit-config-zh-cn.yaml -------------------------------------------------------------------------------- /evaluation/opencompass/.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/.pre-commit-config.yaml -------------------------------------------------------------------------------- /evaluation/opencompass/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/LICENSE -------------------------------------------------------------------------------- /evaluation/opencompass/MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/MANIFEST.in -------------------------------------------------------------------------------- /evaluation/opencompass/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/README.md -------------------------------------------------------------------------------- /evaluation/opencompass/README_zh-CN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/README_zh-CN.md -------------------------------------------------------------------------------- /evaluation/opencompass/configs/eval_sdar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/configs/eval_sdar.py -------------------------------------------------------------------------------- /evaluation/opencompass/dataset-index.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/dataset-index.yml -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/.readthedocs.yaml -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/Makefile -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/_static/css/readthedocs.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/_static/css/readthedocs.css -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/_static/image/logo.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/_static/image/logo.svg -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/_static/image/logo_icon.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/_static/image/logo_icon.svg -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/_static/js/custom.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/_static/js/custom.js -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/_templates/404.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/_templates/404.html -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/_templates/autosummary/class.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/_templates/autosummary/class.rst -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/_templates/callable.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/_templates/callable.rst -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/advanced_guides/circular_eval.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/advanced_guides/circular_eval.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/advanced_guides/code_eval.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/advanced_guides/code_eval.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/advanced_guides/custom_dataset.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/advanced_guides/custom_dataset.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/advanced_guides/llm_judge.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/advanced_guides/llm_judge.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/advanced_guides/longeval.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/advanced_guides/longeval.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/advanced_guides/math_verify.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/advanced_guides/math_verify.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/advanced_guides/new_dataset.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/advanced_guides/new_dataset.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/advanced_guides/new_model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/advanced_guides/new_model.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/advanced_guides/persistence.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/advanced_guides/persistence.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/advanced_guides/prompt_attack.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/advanced_guides/prompt_attack.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/conf.py -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/docutils.conf: -------------------------------------------------------------------------------- 1 | [html writers] 2 | table_style: colwidths-auto 3 | -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/get_started/faq.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/get_started/faq.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/get_started/installation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/get_started/installation.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/get_started/quick_start.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/get_started/quick_start.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/index.rst -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/notes/academic.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/notes/academic.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/notes/contribution_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/notes/contribution_guide.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/notes/news.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/notes/news.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/prompt/chain_of_thought.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/prompt/chain_of_thought.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/prompt/meta_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/prompt/meta_template.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/prompt/overview.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/prompt/overview.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/prompt/prompt_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/prompt/prompt_template.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/statis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/statis.py -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/tools.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/tools.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/user_guides/config.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/user_guides/config.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/user_guides/corebench.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/user_guides/corebench.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/user_guides/datasets.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/user_guides/datasets.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/user_guides/deepseek_r1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/user_guides/deepseek_r1.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/user_guides/evaluation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/user_guides/evaluation.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/user_guides/experimentation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/user_guides/experimentation.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/user_guides/framework_overview.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/user_guides/framework_overview.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/user_guides/interns1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/user_guides/interns1.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/user_guides/metrics.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/user_guides/metrics.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/user_guides/models.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/user_guides/models.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/en/user_guides/summarizer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/en/user_guides/summarizer.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/.readthedocs.yaml -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/Makefile -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/_static/css/readthedocs.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/_static/css/readthedocs.css -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/_static/image/logo.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/_static/image/logo.svg -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/_static/image/logo_icon.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/_static/image/logo_icon.svg -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/_static/js/custom.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/_static/js/custom.js -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/_templates/404.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/_templates/404.html -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/_templates/callable.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/_templates/callable.rst -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/advanced_guides/code_eval.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/advanced_guides/code_eval.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/advanced_guides/llm_judge.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/advanced_guides/llm_judge.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/advanced_guides/longeval.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/advanced_guides/longeval.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/advanced_guides/math_verify.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/advanced_guides/math_verify.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/advanced_guides/new_dataset.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/advanced_guides/new_dataset.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/advanced_guides/new_model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/advanced_guides/new_model.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/advanced_guides/persistence.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/advanced_guides/persistence.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/conf.py -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/cp_origin_docs.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/cp_origin_docs.sh -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/docutils.conf: -------------------------------------------------------------------------------- 1 | [html writers] 2 | table_style: colwidths-auto 3 | -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/get_started/faq.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/get_started/faq.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/get_started/installation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/get_started/installation.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/get_started/quick_start.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/get_started/quick_start.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/index.rst -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/notes/academic.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/notes/academic.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/notes/contribution_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/notes/contribution_guide.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/notes/news.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/notes/news.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/prompt/chain_of_thought.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/prompt/chain_of_thought.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/prompt/meta_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/prompt/meta_template.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/prompt/overview.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/prompt/overview.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/prompt/prompt_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/prompt/prompt_template.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/statis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/statis.py -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/tools.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/tools.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/user_guides/config.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/user_guides/config.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/user_guides/corebench.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/user_guides/corebench.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/user_guides/datasets.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/user_guides/datasets.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/user_guides/deepseek_r1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/user_guides/deepseek_r1.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/user_guides/evaluation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/user_guides/evaluation.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/user_guides/experimentation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/user_guides/experimentation.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/user_guides/interns1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/user_guides/interns1.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/user_guides/metrics.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/user_guides/metrics.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/user_guides/models.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/user_guides/models.md -------------------------------------------------------------------------------- /evaluation/opencompass/docs/zh_cn/user_guides/summarizer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/docs/zh_cn/user_guides/summarizer.md -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_OlympiadBench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_OlympiadBench.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_PMMEval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_PMMEval.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_TheoremQA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_TheoremQA.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_alaya.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_alaya.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_api_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_api_demo.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_attack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_attack.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_babilong.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_babilong.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_base_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_base_demo.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_bench_intern_s1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_bench_intern_s1.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_bluelm_32k_lveval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_bluelm_32k_lveval.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_cascade_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_cascade_evaluator.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_charm_mem.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_charm_mem.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_charm_rea.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_charm_rea.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_chat_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_chat_agent.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_chat_agent_baseline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_chat_agent_baseline.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_chat_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_chat_demo.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_chat_last.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_chat_last.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_chembench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_chembench.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_chinese_simpleqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_chinese_simpleqa.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_cibench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_cibench.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_cibench_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_cibench_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_circular.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_circular.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_claude.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_claude.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_code_passk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_code_passk.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_codeagent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_codeagent.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_codebench_full.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_codebench_full.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_codegeex2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_codegeex2.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_contamination.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_contamination.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_deepseek_r1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_deepseek_r1.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_dingo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_dingo.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_ds1000_interpreter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_ds1000_interpreter.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_edgellm_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_edgellm_demo.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_eese_api_judge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_eese_api_judge.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_gpt3.5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_gpt3.5.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_gpt4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_gpt4.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_hellobench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_hellobench.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_hf_llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_hf_llama2.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_hf_llama_7b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_hf_llama_7b.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_inference_ppl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_inference_ppl.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_internLM.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_internLM.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_internlm2_chat_keyset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_internlm2_chat_keyset.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_internlm2_keyset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_internlm2_keyset.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_internlm_7b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_internlm_7b.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_internlm_chat_turbomind.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_internlm_chat_turbomind.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_internlm_flames_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_internlm_flames_chat.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_internlm_math_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_internlm_math_chat.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_internlm_turbomind.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_internlm_turbomind.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_judge_dataset_all.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_judge_dataset_all.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_judgebench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_judgebench.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_judgerbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_judgerbench.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_judgerbenchv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_judgerbenchv2.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_korbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_korbench.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_lightllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_lightllm.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_livestembench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_livestembench.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_llama2_7b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_llama2_7b.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_llama2_7b_lveval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_llama2_7b_lveval.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_llama3_instruct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_llama3_instruct.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_llm_compression.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_llm_compression.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_llm_judge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_llm_judge.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_lmdeploy_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_lmdeploy_demo.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_longbenchv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_longbenchv2.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_math_llm_judge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_math_llm_judge.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_math_llm_judge_internal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_math_llm_judge_internal.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_math_verify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_math_verify.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_mathbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_mathbench.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_mmlu_cf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_mmlu_cf.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_mmlu_pro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_mmlu_pro.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_modelscope_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_modelscope_datasets.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_multi_prompt_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_multi_prompt_demo.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_musr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_musr.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_needlebench_v2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_needlebench_v2.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_qwen3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_qwen3.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_qwen_7b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_qwen_7b.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_qwen_7b_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_qwen_7b_chat.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_qwen_7b_chat_lawbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_qwen_7b_chat_lawbench.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_rewardbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_rewardbench.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_rmb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_rmb.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_ruler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_ruler.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_ruler_fix_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_ruler_fix_tokenizer.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_rwkv5_3b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_rwkv5_3b.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_simpleqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_simpleqa.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_subjective.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_subjective.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_subjective_bradleyterry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_subjective_bradleyterry.py -------------------------------------------------------------------------------- /evaluation/opencompass/examples/eval_teval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/examples/eval_teval.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.5.0' 2 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/cli/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/cli/main.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/configs/datasets/calm/calm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/configs/datasets/calm/calm.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/configs/datasets/judge/rmb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/configs/datasets/judge/rmb.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/configs/datasets/nq/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/configs/datasets/nq/README.md -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/configs/datasets/nq/nq_gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/configs/datasets/nq/nq_gen.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/configs/models/alaya/alaya.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/configs/models/alaya/alaya.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/configs/models/qwen/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/configs/models/qwen/README.md -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/configs/models/yi/hf_yi_6b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/configs/models/yi/hf_yi_6b.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/configs/summarizers/leval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/configs/summarizers/leval.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/configs/summarizers/lveval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/configs/summarizers/lveval.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/configs/summarizers/medium.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/configs/summarizers/medium.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/configs/summarizers/mmmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/configs/summarizers/mmmlu.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/configs/summarizers/ruler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/configs/summarizers/ruler.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/configs/summarizers/small.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/configs/summarizers/small.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/configs/summarizers/teval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/configs/summarizers/teval.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/configs/summarizers/tiny.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/configs/summarizers/tiny.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/CARDBiomedBench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/CARDBiomedBench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/ClinicBench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/ClinicBench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/Earth_Silver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/Earth_Silver.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/FinanceIQ.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/FinanceIQ.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/GaokaoBench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/GaokaoBench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/IFEval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/IFEval/ifeval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/IFEval/ifeval.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/LCBench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/LCBench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/MMLUArabic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/MMLUArabic.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/MedCalc_Bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/MedCalc_Bench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/MedQA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/MedQA.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/MedXpertQA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/MedXpertQA.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/Medbullets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/Medbullets.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/NPHardEval/p_BSP.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/NPHardEval/p_BSP.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/NPHardEval/p_EDP.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/NPHardEval/p_EDP.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/NPHardEval/p_SPP.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/NPHardEval/p_SPP.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/NPHardEval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/NPHardEval/utils.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/OlympiadBench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/OlympiadBench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/OpenFinData.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/OpenFinData.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/PMMEval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/PMMEval/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/PMMEval/flores.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/PMMEval/flores.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/PMMEval/mgsm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/PMMEval/mgsm.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/PMMEval/mifeval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/PMMEval/mifeval.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/PMMEval/mlogiqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/PMMEval/mlogiqa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/PMMEval/mmmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/PMMEval/mmmlu.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/PMMEval/xnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/PMMEval/xnli.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/ProteinLMBench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/ProteinLMBench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/PubMedQA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/PubMedQA.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/QuALITY.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/QuALITY.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/SciEval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/SciEval.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/SciKnowEval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/SciKnowEval.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/ScienceQA.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/ScienceQA.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/SeedBench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/SeedBench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/TheoremQA/legacy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/TheoremQA/legacy.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/TheoremQA/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/TheoremQA/main.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/TheoremQA/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/TheoremQA/utils.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/advglue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/advglue.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/afqmcd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/afqmcd.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/agieval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/agieval/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/agieval/agieval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/agieval/agieval.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/agieval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/agieval/utils.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/aime2024.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/aime2024.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/anli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/anli.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/anthropics_evals.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/anthropics_evals.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/apps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/apps.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/arc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/arc.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/ax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/ax.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/babilong/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/babilong/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/babilong/babilong.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/babilong/babilong.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/babilong/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/babilong/prompts.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/base.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/bbeh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/bbeh.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/bbh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/bbh.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/benbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/benbench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/boolq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/boolq.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/bustum.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/bustum.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/c3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/c3.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/calm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/calm/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/calm/calm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/calm/calm.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/calm/data_processing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/calm/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/calm/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/cb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/cb.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/ceval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/ceval.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/charm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/charm.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/chem_exam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/chem_exam.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/chembench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/chembench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/chid.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/chid.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/chinese_simpleqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/chinese_simpleqa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/cibench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/cibench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/circular.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/circular.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/civilcomments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/civilcomments.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/climaqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/climaqa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/clozeTest_maxmin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/clozeTest_maxmin.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/cluewsc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/cluewsc.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/cmb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/cmb.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/cmmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/cmmlu.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/cmnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/cmnli.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/cmo_fib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/cmo_fib.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/cmrc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/cmrc.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/codecompass/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/codecompass/utils.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/commonsenseqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/commonsenseqa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/commonsenseqa_cn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/commonsenseqa_cn.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/compassbench_obj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/compassbench_obj.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/copa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/copa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/crowspairs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/crowspairs.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/crowspairs_cn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/crowspairs_cn.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/csl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/csl.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/custom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/custom.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/cvalues.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/cvalues.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/dingo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/dingo.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/drcd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/drcd.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/drop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/drop.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/drop_simple_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/drop_simple_eval.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/ds1000.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/ds1000.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/eese/eese.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/eese/eese.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/eese/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/eese/utils.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/eprstmt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/eprstmt.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/flores.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/flores.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/game24.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/game24.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/gaokao_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/gaokao_math.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/generic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/generic.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/govrepcrs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/govrepcrs.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/gpqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/gpqa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/gsm8k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/gsm8k.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/gsm_hard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/gsm_hard.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/healthbench/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/healthbench/types.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/hellaswag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/hellaswag.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/hle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/hle.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/huggingface.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/humaneval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/humaneval.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/humaneval_multi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/humaneval_multi.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/humaneval_pro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/humaneval_pro.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/humanevalx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/humanevalx.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/hungarian_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/hungarian_math.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/inference_ppl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/inference_ppl.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/internsandbox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/internsandbox.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/iwslt2017.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/iwslt2017.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/jsonl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/jsonl.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/judge/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/judge/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/judge/judgebench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/judge/judgebench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/judge/rewardbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/judge/rewardbench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/judge/rmb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/judge/rmb.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/kaoshi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/kaoshi.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/kcle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/kcle.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/korbench/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/korbench/korbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/korbench/korbench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/korbench/korbench_dataset_config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/korbench/korbench_dataset_config/prompt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/lambada.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/lambada.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/lawbench/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/lawbench/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/lawbench/evaluation_functions/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/lawbench/lawbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/lawbench/lawbench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/lawbench/utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/lawbench/utils/modules/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/lcsts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/lcsts.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/leval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/leval/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/leval/evaluators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/leval/evaluators.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/leval/leval_tpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/leval/leval_tpo.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/livestembench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/livestembench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/llm_compression.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/llm_compression.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/lmeval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/lmeval.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/longbenchv2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/longbenchv2.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/lveval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/lveval/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/lveval/evaluators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/lveval/evaluators.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/mastermath2024v1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/mastermath2024v1.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/matbench/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/matbench/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/matbench/matbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/matbench/matbench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/math.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/math401.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/math401.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/math_intern.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/math_intern.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/mathbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/mathbench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/mbpp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/mbpp.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/mbpp_pro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/mbpp_pro.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/medbench/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/medbench/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/medbench/medbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/medbench/medbench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/medbench/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/medbench/utils.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/medmcqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/medmcqa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/mgsm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/mgsm.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/mmlu.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/mmlu_cf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/mmlu_cf.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/mmlu_pro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/mmlu_pro.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/mmmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/mmmlu.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/multipl_e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/multipl_e.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/multirc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/multirc.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/musr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/musr/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/musr/musr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/musr/musr.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/musr/tree.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/musr/tree.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/narrativeqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/narrativeqa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/natural_question.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/natural_question.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/needlebench/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/needlebench/atc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/needlebench/atc.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/needlebench/multi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/needlebench/multi.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/needlebench_v2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/nejmaibench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/nejmaibench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/obqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/obqa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/olymmath.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/olymmath.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/omni_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/omni_math.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/phybench/EED.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/phybench/EED.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/phybench/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/phybench/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/phybench/phybench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/phybench/phybench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/physics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/physics.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/piqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/piqa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/py150.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/py150.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/qasper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/qasper.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/qaspercut.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/qaspercut.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/race.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/race.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/rbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/rbench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/realtoxicprompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/realtoxicprompts.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/record.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/record.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/rolebench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/rolebench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/ruler/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/ruler/ruler_cwe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/ruler/ruler_cwe.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/ruler/ruler_fwe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/ruler/ruler_fwe.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/ruler/ruler_niah.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/ruler/ruler_niah.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/ruler/ruler_qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/ruler/ruler_qa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/ruler/ruler_vt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/ruler/ruler_vt.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/s3eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/s3eval.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/safety.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/safety.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/sage/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/sage/evaluation.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/sage/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/sage/prompt.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/scibench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/scibench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/scicode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/scicode.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/simpleqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/simpleqa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/siqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/siqa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/smolinstruct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/smolinstruct.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/squad20.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/squad20.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/srbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/srbench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/storycloze.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/storycloze.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/strategyqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/strategyqa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/subjective/corev2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/subjective/corev2.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/subjective/flames.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/subjective/flames.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/subjective/fofo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/subjective/fofo.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/subjective/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/subjective/utils.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/summedits.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/summedits.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/summscreen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/summscreen.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/supergpqa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/svamp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/svamp.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/tabmwp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/tabmwp.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/taco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/taco.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/teval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/teval/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/teval/schema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/teval/schema.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/teval/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/tnews.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/tnews.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/triviaqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/triviaqa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/triviaqarc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/triviaqarc.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/truthfulqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/truthfulqa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/tydiqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/tydiqa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/wic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/wic.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/wikibench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/wikibench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/winograd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/winograd.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/winogrande.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/winogrande.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/wnli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/wnli.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/wsc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/wsc.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/xcopa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/xcopa.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/xiezhi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/xiezhi.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/xlsum.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/xlsum.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/datasets/xsum.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/datasets/xsum.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/evaluator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/evaluator/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/evaluator/math_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/evaluator/math_evaluator.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/lagent/agents/react.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/lagent/agents/react.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/metrics/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/metrics/dump_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/metrics/dump_results.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/metrics/mme_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/metrics/mme_score.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/metrics/seedbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/metrics/seedbench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/accessory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/accessory.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/ai360_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/ai360_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/alaya.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/alaya.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/baichuan_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/baichuan_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/baidu_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/baidu_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/bailing_api_oc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/bailing_api_oc.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/base.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/base_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/base_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/bluelm_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/bluelm_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/bytedance_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/bytedance_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/claude_allesapin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/claude_allesapin.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/claude_api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/claude_api/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/claude_sdk_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/claude_sdk_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/deepseek_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/deepseek_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/doubao.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/doubao.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/doubao_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/doubao_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/gemini_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/gemini_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/glm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/glm.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/huggingface.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/hunyuan_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/hunyuan_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/intern_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/intern_model.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/interntrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/interntrain.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/krgpt_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/krgpt_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/lagent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/lagent.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/langchain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/langchain.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/lightllm_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/lightllm_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/llama2.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/minimax_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/minimax_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/mistral_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/mistral_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/mixtral.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/modelscope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/modelscope.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/moonshot_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/moonshot_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/nanbeige_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/nanbeige_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/openai_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/openai_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/openai_streaming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/openai_streaming.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/pangu_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/pangu_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/qwen_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/qwen_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/rendu_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/rendu_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/sensetime_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/sensetime_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/stepfun_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/stepfun_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/turbomind.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/turbomind.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/turbomind_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/turbomind_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/unigpt_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/unigpt_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/vllm.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/xunfei_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/xunfei_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/yayi_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/yayi_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/yi_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/yi_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/zhipuai_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/zhipuai_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/models/zhipuai_v2_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/models/zhipuai_v2_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/openicl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/openicl/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/openicl/icl_dataset_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/openicl/icl_dataset_reader.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/openicl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .logging import * # noqa 2 | -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/openicl/utils/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/openicl/utils/logging.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/partitioners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/partitioners/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/partitioners/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/partitioners/base.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/partitioners/naive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/partitioners/naive.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/partitioners/num_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/partitioners/num_worker.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/partitioners/size.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/partitioners/size.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/partitioners/sub_naive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/partitioners/sub_naive.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/partitioners/sub_size.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/partitioners/sub_size.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/registry.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/runners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/runners/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/runners/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/runners/base.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/runners/dlc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/runners/dlc.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/runners/local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/runners/local.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/runners/local_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/runners/local_api.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/runners/rjob.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/runners/rjob.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/runners/slurm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/runners/slurm.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/runners/slurm_sequential.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/runners/slurm_sequential.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/runners/volc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/runners/volc.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/summarizers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/summarizers/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/summarizers/circular.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/summarizers/circular.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/summarizers/default.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/summarizers/default.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/summarizers/multi_faceted.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/summarizers/multi_faceted.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/summarizers/multi_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/summarizers/multi_model.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/summarizers/needlebench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/summarizers/needlebench.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/tasks/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/tasks/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/tasks/base.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/tasks/llm_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/tasks/llm_eval.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/tasks/openicl_attack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/tasks/openicl_attack.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/tasks/openicl_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/tasks/openicl_eval.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/tasks/openicl_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/tasks/openicl_infer.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/tasks/subjective_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/tasks/subjective_eval.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/__init__.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/abbr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/abbr.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/auxiliary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/auxiliary.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/build.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/collect_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/collect_env.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/datasets.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/datasets_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/datasets_info.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/dependency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/dependency.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/dict_postprocessors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/dict_postprocessors.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/file.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/fileio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/fileio.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/lark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/lark.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/logging.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/menu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/menu.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/network.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/network.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/prompt.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/result_station.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/result_station.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/run.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/text_postprocessors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/text_postprocessors.py -------------------------------------------------------------------------------- /evaluation/opencompass/opencompass/utils/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/opencompass/utils/types.py -------------------------------------------------------------------------------- /evaluation/opencompass/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/run.py -------------------------------------------------------------------------------- /evaluation/opencompass/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/setup.py -------------------------------------------------------------------------------- /evaluation/opencompass/tests/dataset/test_humaneval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/tests/dataset/test_humaneval.py -------------------------------------------------------------------------------- /evaluation/opencompass/tests/dataset/test_local_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/tests/dataset/test_local_datasets.py -------------------------------------------------------------------------------- /evaluation/opencompass/tests/dataset/test_ms_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/tests/dataset/test_ms_datasets.py -------------------------------------------------------------------------------- /evaluation/opencompass/tests/openicl/test_prompt_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/tests/openicl/test_prompt_template.py -------------------------------------------------------------------------------- /evaluation/opencompass/tests/prompt/test_api_template_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/tests/prompt/test_api_template_parser.py -------------------------------------------------------------------------------- /evaluation/opencompass/tests/prompt/test_lm_template_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/tests/prompt/test_lm_template_parser.py -------------------------------------------------------------------------------- /evaluation/opencompass/tests/prompt/test_prompt_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/tests/prompt/test_prompt_list.py -------------------------------------------------------------------------------- /evaluation/opencompass/tools/case_analyzer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/tools/case_analyzer.py -------------------------------------------------------------------------------- /evaluation/opencompass/tools/collect_code_preds.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/tools/collect_code_preds.py -------------------------------------------------------------------------------- /evaluation/opencompass/tools/compare_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/tools/compare_configs.py -------------------------------------------------------------------------------- /evaluation/opencompass/tools/convert_alignmentbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/tools/convert_alignmentbench.py -------------------------------------------------------------------------------- /evaluation/opencompass/tools/list_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/tools/list_configs.py -------------------------------------------------------------------------------- /evaluation/opencompass/tools/prediction_merger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/tools/prediction_merger.py -------------------------------------------------------------------------------- /evaluation/opencompass/tools/prompt_viewer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/tools/prompt_viewer.py -------------------------------------------------------------------------------- /evaluation/opencompass/tools/test_api_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/tools/test_api_model.py -------------------------------------------------------------------------------- /evaluation/opencompass/tools/update_dataset_suffix.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/tools/update_dataset_suffix.py -------------------------------------------------------------------------------- /evaluation/opencompass/tools/viz_multi_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/evaluation/opencompass/tools/viz_multi_model.py -------------------------------------------------------------------------------- /generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/generate.py -------------------------------------------------------------------------------- /training/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/README.md -------------------------------------------------------------------------------- /training/llama_factory_sdar/.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/.dockerignore -------------------------------------------------------------------------------- /training/llama_factory_sdar/.env.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/.env.local -------------------------------------------------------------------------------- /training/llama_factory_sdar/.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/.gitattributes -------------------------------------------------------------------------------- /training/llama_factory_sdar/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/.gitignore -------------------------------------------------------------------------------- /training/llama_factory_sdar/.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/.pre-commit-config.yaml -------------------------------------------------------------------------------- /training/llama_factory_sdar/CITATION.cff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/CITATION.cff -------------------------------------------------------------------------------- /training/llama_factory_sdar/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/LICENSE -------------------------------------------------------------------------------- /training/llama_factory_sdar/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE requirements.txt 2 | -------------------------------------------------------------------------------- /training/llama_factory_sdar/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/Makefile -------------------------------------------------------------------------------- /training/llama_factory_sdar/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/README.md -------------------------------------------------------------------------------- /training/llama_factory_sdar/README_zh.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/README_zh.md -------------------------------------------------------------------------------- /training/llama_factory_sdar/assets/alaya_new.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/assets/alaya_new.svg -------------------------------------------------------------------------------- /training/llama_factory_sdar/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/assets/logo.png -------------------------------------------------------------------------------- /training/llama_factory_sdar/assets/serpapi.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/assets/serpapi.svg -------------------------------------------------------------------------------- /training/llama_factory_sdar/assets/warp.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/assets/warp.jpg -------------------------------------------------------------------------------- /training/llama_factory_sdar/assets/wechat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/assets/wechat.jpg -------------------------------------------------------------------------------- /training/llama_factory_sdar/assets/wechat_alaya.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/assets/wechat_alaya.png -------------------------------------------------------------------------------- /training/llama_factory_sdar/assets/wechat_npu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/assets/wechat_npu.jpg -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/README.md -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/README_zh.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/README_zh.md -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/alpaca_en_demo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/alpaca_en_demo.json -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/alpaca_zh_demo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/alpaca_zh_demo.json -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/c4_demo.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/c4_demo.jsonl -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/dataset_info.json -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/dpo_en_demo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/dpo_en_demo.json -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/dpo_zh_demo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/dpo_zh_demo.json -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/glaive_toolcall_en_demo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/glaive_toolcall_en_demo.json -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/glaive_toolcall_zh_demo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/glaive_toolcall_zh_demo.json -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/hh_rlhf_en/hh_rlhf_en.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/hh_rlhf_en/hh_rlhf_en.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/identity.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/identity.json -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/kto_en_demo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/kto_en_demo.json -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/mllm_audio_demo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/mllm_audio_demo.json -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/mllm_demo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/mllm_demo.json -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/mllm_demo_data/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/mllm_demo_data/1.jpg -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/mllm_demo_data/1.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/mllm_demo_data/1.mp3 -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/mllm_demo_data/1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/mllm_demo_data/1.mp4 -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/mllm_demo_data/2.avi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/mllm_demo_data/2.avi -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/mllm_demo_data/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/mllm_demo_data/2.jpg -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/mllm_demo_data/2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/mllm_demo_data/2.wav -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/mllm_demo_data/3.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/mllm_demo_data/3.flac -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/mllm_demo_data/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/mllm_demo_data/3.jpg -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/mllm_demo_data/3.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/mllm_demo_data/3.mp4 -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/mllm_demo_data/4.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/mllm_demo_data/4.mp3 -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/mllm_demo_data/4.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/mllm_demo_data/4.mp4 -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/mllm_video_audio_demo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/mllm_video_audio_demo.json -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/mllm_video_demo.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/mllm_video_demo.json -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/ultra_chat/ultra_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/ultra_chat/ultra_chat.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/data/wiki_demo.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/data/wiki_demo.txt -------------------------------------------------------------------------------- /training/llama_factory_sdar/docker/docker-cuda/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/docker/docker-cuda/Dockerfile -------------------------------------------------------------------------------- /training/llama_factory_sdar/docker/docker-cuda/Dockerfile.base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/docker/docker-cuda/Dockerfile.base -------------------------------------------------------------------------------- /training/llama_factory_sdar/docker/docker-cuda/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/docker/docker-cuda/README.md -------------------------------------------------------------------------------- /training/llama_factory_sdar/docker/docker-npu/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/docker/docker-npu/Dockerfile -------------------------------------------------------------------------------- /training/llama_factory_sdar/docker/docker-rocm/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/docker/docker-rocm/Dockerfile -------------------------------------------------------------------------------- /training/llama_factory_sdar/evaluation/ceval/ceval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/evaluation/ceval/ceval.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/evaluation/ceval/ceval.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/evaluation/ceval/ceval.zip -------------------------------------------------------------------------------- /training/llama_factory_sdar/evaluation/ceval/mapping.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/evaluation/ceval/mapping.json -------------------------------------------------------------------------------- /training/llama_factory_sdar/evaluation/cmmlu/cmmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/evaluation/cmmlu/cmmlu.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/evaluation/cmmlu/cmmlu.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/evaluation/cmmlu/cmmlu.zip -------------------------------------------------------------------------------- /training/llama_factory_sdar/evaluation/cmmlu/mapping.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/evaluation/cmmlu/mapping.json -------------------------------------------------------------------------------- /training/llama_factory_sdar/evaluation/mmlu/mapping.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/evaluation/mmlu/mapping.json -------------------------------------------------------------------------------- /training/llama_factory_sdar/evaluation/mmlu/mmlu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/evaluation/mmlu/mmlu.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/evaluation/mmlu/mmlu.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/evaluation/mmlu/mmlu.zip -------------------------------------------------------------------------------- /training/llama_factory_sdar/examples/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/examples/README.md -------------------------------------------------------------------------------- /training/llama_factory_sdar/examples/README_zh.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/examples/README_zh.md -------------------------------------------------------------------------------- /training/llama_factory_sdar/examples/extras/pissa/init.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/examples/extras/pissa/init.sh -------------------------------------------------------------------------------- /training/llama_factory_sdar/examples/inference/llama3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/examples/inference/llama3.yaml -------------------------------------------------------------------------------- /training/llama_factory_sdar/examples/inference/qwen2_5vl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/examples/inference/qwen2_5vl.yaml -------------------------------------------------------------------------------- /training/llama_factory_sdar/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/pyproject.toml -------------------------------------------------------------------------------- /training/llama_factory_sdar/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/requirements.txt -------------------------------------------------------------------------------- /training/llama_factory_sdar/run_local.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/run_local.sh -------------------------------------------------------------------------------- /training/llama_factory_sdar/scripts/api_example/test_image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/scripts/api_example/test_image.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/scripts/eval_bleu_rouge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/scripts/eval_bleu_rouge.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/scripts/llama_pro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/scripts/llama_pro.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/scripts/loftq_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/scripts/loftq_init.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/scripts/pissa_init.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/scripts/pissa_init.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/scripts/qwen_omni_merge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/scripts/qwen_omni_merge.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/scripts/stat_utils/cal_flops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/scripts/stat_utils/cal_flops.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/scripts/stat_utils/cal_lr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/scripts/stat_utils/cal_lr.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/scripts/stat_utils/cal_mfu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/scripts/stat_utils/cal_mfu.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/scripts/stat_utils/cal_ppl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/scripts/stat_utils/cal_ppl.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/scripts/stat_utils/length_cdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/scripts/stat_utils/length_cdf.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/scripts/vllm_infer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/scripts/vllm_infer.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/setup.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/api.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/__init__.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/api/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/api/app.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/api/chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/api/chat.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/api/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/api/common.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/api/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/api/protocol.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/chat/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/chat/__init__.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/chat/hf_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/chat/hf_engine.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/cli.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/data/__init__.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/data/collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/data/collator.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/data/converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/data/converter.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/data/formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/data/formatter.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/data/loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/data/loader.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/data/mm_plugin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/data/mm_plugin.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/data/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/data/parser.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/data/template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/data/template.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/eval/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/eval/evaluator.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/eval/template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/eval/template.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/extras/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/extras/env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/extras/env.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/extras/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/extras/logging.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/extras/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/extras/misc.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/extras/ploting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/extras/ploting.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/hparams/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/hparams/parser.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/launcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/launcher.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/model/__init__.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/model/adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/model/adapter.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/model/loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/model/loader.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/model/model_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/model/patcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/model/patcher.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/third_party/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/train/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/train/tuner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/train/tuner.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/webui/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/webui/chatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/webui/chatter.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/webui/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/webui/common.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/webui/control.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/webui/control.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/webui/css.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/webui/css.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/webui/engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/webui/engine.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/webui/locales.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/webui/locales.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/webui/manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/webui/manager.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/llamafactory/webui/runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/llamafactory/webui/runner.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/train.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/src/webui.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/src/webui.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/check_license.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/check_license.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/data/test_collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/data/test_collator.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/data/test_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/data/test_converter.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/data/test_formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/data/test_formatter.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/data/test_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/data/test_loader.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/data/test_mm_plugin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/data/test_mm_plugin.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/data/test_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/data/test_template.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/e2e/test_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/e2e/test_chat.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/e2e/test_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/e2e/test_sglang.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/e2e/test_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/e2e/test_train.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/eval/test_eval_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/eval/test_eval_template.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/model/test_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/model/test_base.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/model/test_freeze.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/model/test_freeze.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/model/test_full.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/model/test_full.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/model/test_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/model/test_lora.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/model/test_pissa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/model/test_pissa.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/train/test_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/train/test_sft_trainer.py -------------------------------------------------------------------------------- /training/llama_factory_sdar/tests/version.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llama_factory_sdar/tests/version.txt -------------------------------------------------------------------------------- /training/llamafactory_full_env.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/llamafactory_full_env.yml -------------------------------------------------------------------------------- /training/model/SDAR-4B-Chat/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-4B-Chat/README.md -------------------------------------------------------------------------------- /training/model/SDAR-4B-Chat/added_tokens.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-4B-Chat/added_tokens.json -------------------------------------------------------------------------------- /training/model/SDAR-4B-Chat/chat_template.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-4B-Chat/chat_template.jinja -------------------------------------------------------------------------------- /training/model/SDAR-4B-Chat/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-4B-Chat/config.json -------------------------------------------------------------------------------- /training/model/SDAR-4B-Chat/configuration_sdar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-4B-Chat/configuration_sdar.py -------------------------------------------------------------------------------- /training/model/SDAR-4B-Chat/generation_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-4B-Chat/generation_config.json -------------------------------------------------------------------------------- /training/model/SDAR-4B-Chat/merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-4B-Chat/merges.txt -------------------------------------------------------------------------------- /training/model/SDAR-4B-Chat/model.safetensors.index.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-4B-Chat/model.safetensors.index.json -------------------------------------------------------------------------------- /training/model/SDAR-4B-Chat/modeling_sdar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-4B-Chat/modeling_sdar.py -------------------------------------------------------------------------------- /training/model/SDAR-4B-Chat/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-4B-Chat/special_tokens_map.json -------------------------------------------------------------------------------- /training/model/SDAR-4B-Chat/tokenization_qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-4B-Chat/tokenization_qwen2.py -------------------------------------------------------------------------------- /training/model/SDAR-4B-Chat/tokenization_qwen2_fast.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-4B-Chat/tokenization_qwen2_fast.py -------------------------------------------------------------------------------- /training/model/SDAR-4B-Chat/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-4B-Chat/tokenizer.json -------------------------------------------------------------------------------- /training/model/SDAR-4B-Chat/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-4B-Chat/tokenizer_config.json -------------------------------------------------------------------------------- /training/model/SDAR-4B-Chat/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-4B-Chat/vocab.json -------------------------------------------------------------------------------- /training/model/SDAR-8B-Chat/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-8B-Chat/README.md -------------------------------------------------------------------------------- /training/model/SDAR-8B-Chat/added_tokens.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-8B-Chat/added_tokens.json -------------------------------------------------------------------------------- /training/model/SDAR-8B-Chat/chat_template.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-8B-Chat/chat_template.jinja -------------------------------------------------------------------------------- /training/model/SDAR-8B-Chat/config copy.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-8B-Chat/config copy.json -------------------------------------------------------------------------------- /training/model/SDAR-8B-Chat/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-8B-Chat/config.json -------------------------------------------------------------------------------- /training/model/SDAR-8B-Chat/configuration_sdar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-8B-Chat/configuration_sdar.py -------------------------------------------------------------------------------- /training/model/SDAR-8B-Chat/generation_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-8B-Chat/generation_config.json -------------------------------------------------------------------------------- /training/model/SDAR-8B-Chat/merges.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-8B-Chat/merges.txt -------------------------------------------------------------------------------- /training/model/SDAR-8B-Chat/model.safetensors.index.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-8B-Chat/model.safetensors.index.json -------------------------------------------------------------------------------- /training/model/SDAR-8B-Chat/modeling_sdar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-8B-Chat/modeling_sdar.py -------------------------------------------------------------------------------- /training/model/SDAR-8B-Chat/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-8B-Chat/special_tokens_map.json -------------------------------------------------------------------------------- /training/model/SDAR-8B-Chat/tokenization_qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-8B-Chat/tokenization_qwen2.py -------------------------------------------------------------------------------- /training/model/SDAR-8B-Chat/tokenization_qwen2_fast.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-8B-Chat/tokenization_qwen2_fast.py -------------------------------------------------------------------------------- /training/model/SDAR-8B-Chat/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-8B-Chat/tokenizer.json -------------------------------------------------------------------------------- /training/model/SDAR-8B-Chat/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-8B-Chat/tokenizer_config.json -------------------------------------------------------------------------------- /training/model/SDAR-8B-Chat/vocab.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JetAstra/SDAR/HEAD/training/model/SDAR-8B-Chat/vocab.json --------------------------------------------------------------------------------