├── .gitignore ├── LICENSE ├── README.md ├── data ├── add_aime.py ├── collect_data.py ├── construct_limo.py ├── construct_long_safety_dataset.py ├── construct_s1k.py ├── construct_safety_dataset.py ├── decontaminate_util.py └── fix_gpqa.py ├── deepspeed_zero3.yaml ├── eval ├── commands.sh ├── compute_sample_stats.py ├── generate.py ├── lm-evaluation-harness │ ├── .coveragerc │ ├── .flake8 │ ├── .github │ │ └── workflows │ │ │ ├── new_tasks.yml │ │ │ ├── publish.yml │ │ │ └── unit_tests.yml │ ├── .gitignore │ ├── .pre-commit-config.yaml │ ├── CITATION.bib │ ├── CODEOWNERS │ ├── LICENSE.md │ ├── README.md │ ├── docs │ │ ├── API_guide.md │ │ ├── CONTRIBUTING.md │ │ ├── README.md │ │ ├── decontamination.md │ │ ├── img │ │ │ └── fewshot_example_gpt3.png │ │ ├── interface.md │ │ ├── model_guide.md │ │ ├── new_task_guide.md │ │ └── task_guide.md │ ├── dummy │ │ ├── samples_aime24_figures_2025-02-10T01-10-48.366759.jsonl │ │ ├── samples_aime24_figures_2025-02-10T01-12-57.975157.jsonl │ │ ├── samples_aime24_nofigures_2025-02-10T01-10-48.366759.jsonl │ │ ├── samples_aime24_nofigures_2025-02-10T01-12-57.975157.jsonl │ │ ├── samples_gpqa_diamond_openai_2025-02-10T01-10-48.366759.jsonl │ │ └── samples_gpqa_diamond_openai_2025-02-10T01-12-57.975157.jsonl │ ├── examples │ │ ├── lm-eval-overview.ipynb │ │ ├── visualize-wandb.ipynb │ │ └── visualize-zeno.ipynb │ ├── ignore.txt │ ├── lm_eval │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── api │ │ │ ├── __init__.py │ │ │ ├── filter.py │ │ │ ├── group.py │ │ │ ├── instance.py │ │ │ ├── metrics.py │ │ │ ├── model.py │ │ │ ├── registry.py │ │ │ ├── samplers.py │ │ │ └── task.py │ │ ├── caching │ │ │ ├── __init__.py │ │ │ └── cache.py │ │ ├── decontamination │ │ │ ├── __init__.py │ │ │ ├── archiver.py │ │ │ ├── decontaminate.py │ │ │ └── janitor.py │ │ ├── evaluator.py │ │ ├── evaluator_utils.py │ │ ├── filters │ │ │ ├── __init__.py │ │ │ ├── decontamination.py │ │ │ ├── extraction.py │ │ │ ├── selection.py │ │ │ └── transformation.py │ │ ├── loggers │ │ │ ├── __init__.py │ │ │ ├── evaluation_tracker.py │ │ │ ├── utils.py │ │ │ └── wandb_logger.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── anthropic_llms.py │ │ │ ├── api_models.py │ │ │ ├── dummy.py │ │ │ ├── gemini.py │ │ │ ├── gguf.py │ │ │ ├── hf_vlms.py │ │ │ ├── huggingface.py │ │ │ ├── mamba_lm.py │ │ │ ├── nemo_lm.py │ │ │ ├── neuralmagic.py │ │ │ ├── neuron_optimum.py │ │ │ ├── openai_completions.py │ │ │ ├── optimum_lm.py │ │ │ ├── sglang.py │ │ │ ├── textsynth.py │ │ │ ├── utils.py │ │ │ ├── vllm_causallms.py │ │ │ └── vllm_vlms.py │ │ ├── prompts │ │ │ └── __init__.py │ │ ├── tasks │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── aclue │ │ │ │ ├── README.md │ │ │ │ ├── _aclue.yaml │ │ │ │ ├── _default_template_yaml │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── aclue_ancient_chinese_culture.yaml │ │ │ │ ├── aclue_ancient_literature.yaml │ │ │ │ ├── aclue_ancient_medical.yaml │ │ │ │ ├── aclue_ancient_phonetics.yaml │ │ │ │ ├── aclue_basic_ancient_chinese.yaml │ │ │ │ ├── aclue_couplet_prediction.yaml │ │ │ │ ├── aclue_homographic_character_resolution.yaml │ │ │ │ ├── aclue_named_entity_recognition.yaml │ │ │ │ ├── aclue_poetry_appreciate.yaml │ │ │ │ ├── aclue_poetry_context_prediction.yaml │ │ │ │ ├── aclue_poetry_quality_assessment.yaml │ │ │ │ ├── aclue_poetry_sentiment_analysis.yaml │ │ │ │ ├── aclue_polysemy_resolution.yaml │ │ │ │ ├── aclue_reading_comprehension.yaml │ │ │ │ └── aclue_sentence_segmentation.yaml │ │ │ ├── aexams │ │ │ │ ├── README.md │ │ │ │ ├── _aexams.yaml │ │ │ │ ├── _default_template_yaml │ │ │ │ ├── aexams_Biology.yaml │ │ │ │ ├── aexams_IslamicStudies.yaml │ │ │ │ ├── aexams_Physics.yaml │ │ │ │ ├── aexams_Science.yaml │ │ │ │ └── aexams_Social.yaml │ │ │ ├── afrimgsm │ │ │ │ ├── README.md │ │ │ │ ├── direct │ │ │ │ │ ├── afrimgsm_direct_amh.yaml │ │ │ │ │ ├── afrimgsm_direct_eng.yaml │ │ │ │ │ ├── afrimgsm_direct_ewe.yaml │ │ │ │ │ ├── afrimgsm_direct_fra.yaml │ │ │ │ │ ├── afrimgsm_direct_hau.yaml │ │ │ │ │ ├── afrimgsm_direct_ibo.yaml │ │ │ │ │ ├── afrimgsm_direct_kin.yaml │ │ │ │ │ ├── afrimgsm_direct_lin.yaml │ │ │ │ │ ├── afrimgsm_direct_lug.yaml │ │ │ │ │ ├── afrimgsm_direct_orm.yaml │ │ │ │ │ ├── afrimgsm_direct_sna.yaml │ │ │ │ │ ├── afrimgsm_direct_sot.yaml │ │ │ │ │ ├── afrimgsm_direct_swa.yaml │ │ │ │ │ ├── afrimgsm_direct_twi.yaml │ │ │ │ │ ├── afrimgsm_direct_wol.yaml │ │ │ │ │ ├── afrimgsm_direct_xho.yaml │ │ │ │ │ ├── afrimgsm_direct_yor.yaml │ │ │ │ │ ├── afrimgsm_direct_zul.yaml │ │ │ │ │ └── direct_yaml │ │ │ │ ├── en_cot │ │ │ │ │ ├── afrimgsm_en_cot_amh.yaml │ │ │ │ │ ├── afrimgsm_en_cot_eng.yaml │ │ │ │ │ ├── afrimgsm_en_cot_ewe.yaml │ │ │ │ │ ├── afrimgsm_en_cot_fra.yaml │ │ │ │ │ ├── afrimgsm_en_cot_hau.yaml │ │ │ │ │ ├── afrimgsm_en_cot_ibo.yaml │ │ │ │ │ ├── afrimgsm_en_cot_kin.yaml │ │ │ │ │ ├── afrimgsm_en_cot_lin.yaml │ │ │ │ │ ├── afrimgsm_en_cot_lug.yaml │ │ │ │ │ ├── afrimgsm_en_cot_orm.yaml │ │ │ │ │ ├── afrimgsm_en_cot_sna.yaml │ │ │ │ │ ├── afrimgsm_en_cot_sot.yaml │ │ │ │ │ ├── afrimgsm_en_cot_swa.yaml │ │ │ │ │ ├── afrimgsm_en_cot_twi.yaml │ │ │ │ │ ├── afrimgsm_en_cot_wol.yaml │ │ │ │ │ ├── afrimgsm_en_cot_xho.yaml │ │ │ │ │ ├── afrimgsm_en_cot_yor.yaml │ │ │ │ │ ├── afrimgsm_en_cot_zul.yaml │ │ │ │ │ └── cot_yaml │ │ │ │ ├── gen_yaml.sh │ │ │ │ ├── run.sh │ │ │ │ ├── translate │ │ │ │ │ ├── afrimgsm_translate_amh.yaml │ │ │ │ │ ├── afrimgsm_translate_eng.yaml │ │ │ │ │ ├── afrimgsm_translate_ewe.yaml │ │ │ │ │ ├── afrimgsm_translate_fra.yaml │ │ │ │ │ ├── afrimgsm_translate_hau.yaml │ │ │ │ │ ├── afrimgsm_translate_ibo.yaml │ │ │ │ │ ├── afrimgsm_translate_kin.yaml │ │ │ │ │ ├── afrimgsm_translate_lin.yaml │ │ │ │ │ ├── afrimgsm_translate_lug.yaml │ │ │ │ │ ├── afrimgsm_translate_orm.yaml │ │ │ │ │ ├── afrimgsm_translate_sna.yaml │ │ │ │ │ ├── afrimgsm_translate_sot.yaml │ │ │ │ │ ├── afrimgsm_translate_swa.yaml │ │ │ │ │ ├── afrimgsm_translate_twi.yaml │ │ │ │ │ ├── afrimgsm_translate_wol.yaml │ │ │ │ │ ├── afrimgsm_translate_xho.yaml │ │ │ │ │ ├── afrimgsm_translate_yor.yaml │ │ │ │ │ ├── afrimgsm_translate_zul.yaml │ │ │ │ │ └── translate_direct_yaml │ │ │ │ └── utils.py │ │ │ ├── afrimmlu │ │ │ │ ├── README.md │ │ │ │ ├── direct │ │ │ │ │ ├── afrimmlu_common_yaml │ │ │ │ │ ├── afrimmlu_direct_amh.yaml │ │ │ │ │ ├── afrimmlu_direct_eng.yaml │ │ │ │ │ ├── afrimmlu_direct_ewe.yaml │ │ │ │ │ ├── afrimmlu_direct_fra.yaml │ │ │ │ │ ├── afrimmlu_direct_hau.yaml │ │ │ │ │ ├── afrimmlu_direct_ibo.yaml │ │ │ │ │ ├── afrimmlu_direct_kin.yaml │ │ │ │ │ ├── afrimmlu_direct_lin.yaml │ │ │ │ │ ├── afrimmlu_direct_lug.yaml │ │ │ │ │ ├── afrimmlu_direct_orm.yaml │ │ │ │ │ ├── afrimmlu_direct_sna.yaml │ │ │ │ │ ├── afrimmlu_direct_sot.yaml │ │ │ │ │ ├── afrimmlu_direct_swa.yaml │ │ │ │ │ ├── afrimmlu_direct_twi.yaml │ │ │ │ │ ├── afrimmlu_direct_wol.yaml │ │ │ │ │ ├── afrimmlu_direct_xho.yaml │ │ │ │ │ ├── afrimmlu_direct_yor.yaml │ │ │ │ │ ├── afrimmlu_direct_zul.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── fewshot.sh │ │ │ │ ├── translate │ │ │ │ │ ├── afrimmlu_common_translate_yaml │ │ │ │ │ ├── afrimmlu_translate_amh.yaml │ │ │ │ │ ├── afrimmlu_translate_eng.yaml │ │ │ │ │ ├── afrimmlu_translate_ewe.yaml │ │ │ │ │ ├── afrimmlu_translate_fra.yaml │ │ │ │ │ ├── afrimmlu_translate_hau.yaml │ │ │ │ │ ├── afrimmlu_translate_ibo.yaml │ │ │ │ │ ├── afrimmlu_translate_kin.yaml │ │ │ │ │ ├── afrimmlu_translate_lin.yaml │ │ │ │ │ ├── afrimmlu_translate_lug.yaml │ │ │ │ │ ├── afrimmlu_translate_orm.yaml │ │ │ │ │ ├── afrimmlu_translate_sna.yaml │ │ │ │ │ ├── afrimmlu_translate_sot.yaml │ │ │ │ │ ├── afrimmlu_translate_swa.yaml │ │ │ │ │ ├── afrimmlu_translate_twi.yaml │ │ │ │ │ ├── afrimmlu_translate_wol.yaml │ │ │ │ │ ├── afrimmlu_translate_xho.yaml │ │ │ │ │ ├── afrimmlu_translate_yor.yaml │ │ │ │ │ ├── afrimmlu_translate_zul.yaml │ │ │ │ │ └── utils.py │ │ │ │ └── utils.py │ │ │ ├── afrixnli │ │ │ │ ├── README.md │ │ │ │ ├── anli prompt │ │ │ │ │ ├── en-direct │ │ │ │ │ │ ├── afrixnli_en_direct_amh.yaml │ │ │ │ │ │ ├── afrixnli_en_direct_eng.yaml │ │ │ │ │ │ ├── afrixnli_en_direct_ewe.yaml │ │ │ │ │ │ ├── afrixnli_en_direct_fra.yaml │ │ │ │ │ │ ├── afrixnli_en_direct_hau.yaml │ │ │ │ │ │ ├── afrixnli_en_direct_ibo.yaml │ │ │ │ │ │ ├── afrixnli_en_direct_kin.yaml │ │ │ │ │ │ ├── afrixnli_en_direct_lin.yaml │ │ │ │ │ │ ├── afrixnli_en_direct_lug.yaml │ │ │ │ │ │ ├── afrixnli_en_direct_orm.yaml │ │ │ │ │ │ ├── afrixnli_en_direct_sna.yaml │ │ │ │ │ │ ├── afrixnli_en_direct_sot.yaml │ │ │ │ │ │ ├── afrixnli_en_direct_swa.yaml │ │ │ │ │ │ ├── afrixnli_en_direct_twi.yaml │ │ │ │ │ │ ├── afrixnli_en_direct_wol.yaml │ │ │ │ │ │ ├── afrixnli_en_direct_xho.yaml │ │ │ │ │ │ ├── afrixnli_en_direct_yaml │ │ │ │ │ │ ├── afrixnli_en_direct_yor.yaml │ │ │ │ │ │ ├── afrixnli_en_direct_zul.yaml │ │ │ │ │ │ └── utils.py │ │ │ │ │ ├── native-direct │ │ │ │ │ │ ├── afrixnli_native_direct_amh.yaml │ │ │ │ │ │ ├── afrixnli_native_direct_eng.yaml │ │ │ │ │ │ ├── afrixnli_native_direct_ewe.yaml │ │ │ │ │ │ ├── afrixnli_native_direct_fra.yaml │ │ │ │ │ │ ├── afrixnli_native_direct_hau.yaml │ │ │ │ │ │ ├── afrixnli_native_direct_ibo.yaml │ │ │ │ │ │ ├── afrixnli_native_direct_kin.yaml │ │ │ │ │ │ ├── afrixnli_native_direct_lin.yaml │ │ │ │ │ │ ├── afrixnli_native_direct_lug.yaml │ │ │ │ │ │ ├── afrixnli_native_direct_orm.yaml │ │ │ │ │ │ ├── afrixnli_native_direct_sna.yaml │ │ │ │ │ │ ├── afrixnli_native_direct_sot.yaml │ │ │ │ │ │ ├── afrixnli_native_direct_swa.yaml │ │ │ │ │ │ ├── afrixnli_native_direct_twi.yaml │ │ │ │ │ │ ├── afrixnli_native_direct_wol.yaml │ │ │ │ │ │ ├── afrixnli_native_direct_xho.yaml │ │ │ │ │ │ ├── afrixnli_native_direct_yaml │ │ │ │ │ │ ├── afrixnli_native_direct_yor.yaml │ │ │ │ │ │ ├── afrixnli_native_direct_zul.yaml │ │ │ │ │ │ └── utils.py │ │ │ │ │ └── translate │ │ │ │ │ │ ├── afrixnli_translate_amh.yaml │ │ │ │ │ │ ├── afrixnli_translate_ewe.yaml │ │ │ │ │ │ ├── afrixnli_translate_fra.yaml │ │ │ │ │ │ ├── afrixnli_translate_hau.yaml │ │ │ │ │ │ ├── afrixnli_translate_ibo.yaml │ │ │ │ │ │ ├── afrixnli_translate_kin.yaml │ │ │ │ │ │ ├── afrixnli_translate_lin.yaml │ │ │ │ │ │ ├── afrixnli_translate_lug.yaml │ │ │ │ │ │ ├── afrixnli_translate_orm.yaml │ │ │ │ │ │ ├── afrixnli_translate_sna.yaml │ │ │ │ │ │ ├── afrixnli_translate_sot.yaml │ │ │ │ │ │ ├── afrixnli_translate_swa.yaml │ │ │ │ │ │ ├── afrixnli_translate_twi.yaml │ │ │ │ │ │ ├── afrixnli_translate_wol.yaml │ │ │ │ │ │ ├── afrixnli_translate_xho.yaml │ │ │ │ │ │ ├── afrixnli_translate_yaml │ │ │ │ │ │ ├── afrixnli_translate_yor.yaml │ │ │ │ │ │ ├── afrixnli_translate_zul.yaml │ │ │ │ │ │ └── utils.py │ │ │ │ ├── lai prompt │ │ │ │ │ ├── direct │ │ │ │ │ │ ├── afrixnli_manual_direct_amh.yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_eng.yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_ewe.yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_fra.yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_hau.yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_ibo.yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_kin.yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_lin.yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_lug.yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_orm.yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_sna.yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_sot.yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_swa.yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_twi.yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_wol.yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_xho.yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_yor.yaml │ │ │ │ │ │ ├── afrixnli_manual_direct_zul.yaml │ │ │ │ │ │ └── utils.py │ │ │ │ │ └── translate │ │ │ │ │ │ ├── afrixnli_manual_translate_amh.yaml │ │ │ │ │ │ ├── afrixnli_manual_translate_ewe.yaml │ │ │ │ │ │ ├── afrixnli_manual_translate_fra.yaml │ │ │ │ │ │ ├── afrixnli_manual_translate_hau.yaml │ │ │ │ │ │ ├── afrixnli_manual_translate_ibo.yaml │ │ │ │ │ │ ├── afrixnli_manual_translate_kin.yaml │ │ │ │ │ │ ├── afrixnli_manual_translate_lin.yaml │ │ │ │ │ │ ├── afrixnli_manual_translate_lug.yaml │ │ │ │ │ │ ├── afrixnli_manual_translate_orm.yaml │ │ │ │ │ │ ├── afrixnli_manual_translate_sna.yaml │ │ │ │ │ │ ├── afrixnli_manual_translate_sot.yaml │ │ │ │ │ │ ├── afrixnli_manual_translate_swa.yaml │ │ │ │ │ │ ├── afrixnli_manual_translate_twi.yaml │ │ │ │ │ │ ├── afrixnli_manual_translate_wol.yaml │ │ │ │ │ │ ├── afrixnli_manual_translate_xho.yaml │ │ │ │ │ │ ├── afrixnli_manual_translate_yaml │ │ │ │ │ │ ├── afrixnli_manual_translate_yor.yaml │ │ │ │ │ │ ├── afrixnli_manual_translate_zul.yaml │ │ │ │ │ │ └── utils.py │ │ │ │ └── utils.py │ │ │ ├── agieval │ │ │ │ ├── README.md │ │ │ │ ├── agieval.yaml │ │ │ │ ├── agieval_cn.yaml │ │ │ │ ├── agieval_en.yaml │ │ │ │ ├── agieval_nous.yaml │ │ │ │ ├── aqua-rat.yaml │ │ │ │ ├── gaokao-biology.yaml │ │ │ │ ├── gaokao-chemistry.yaml │ │ │ │ ├── gaokao-chinese.yaml │ │ │ │ ├── gaokao-english.yaml │ │ │ │ ├── gaokao-geography.yaml │ │ │ │ ├── gaokao-history.yaml │ │ │ │ ├── gaokao-mathcloze.yaml │ │ │ │ ├── gaokao-mathqa.yaml │ │ │ │ ├── gaokao-physics.yaml │ │ │ │ ├── jec-qa-ca.yaml │ │ │ │ ├── jec-qa-kd.yaml │ │ │ │ ├── logiqa-en.yaml │ │ │ │ ├── logiqa-zh.yaml │ │ │ │ ├── lsat-ar.yaml │ │ │ │ ├── lsat-lr.yaml │ │ │ │ ├── lsat-rc.yaml │ │ │ │ ├── math.yaml │ │ │ │ ├── sat-en-without-passage.yaml │ │ │ │ ├── sat-en.yaml │ │ │ │ ├── sat-math.yaml │ │ │ │ └── utils.py │ │ │ ├── aime │ │ │ │ ├── README.md │ │ │ │ ├── aime24_figures.yaml │ │ │ │ ├── aime24_figures_agg64.yaml │ │ │ │ ├── aime24_nofigures.yaml │ │ │ │ ├── aime24_nofigures_agg64.yaml │ │ │ │ ├── aime_2024_agg8.yaml │ │ │ │ ├── aime_2024_rebase.yaml │ │ │ │ ├── aime_figures.yaml │ │ │ │ ├── aime_nofigures.yaml │ │ │ │ └── utils.py │ │ │ ├── alghafa │ │ │ │ ├── copa_ar │ │ │ │ │ ├── README.md │ │ │ │ │ └── copa_ar.yaml │ │ │ │ └── piqa_ar │ │ │ │ │ ├── README.md │ │ │ │ │ └── piqa_ar.yaml │ │ │ ├── anli │ │ │ │ ├── README.md │ │ │ │ ├── anli_r1.yaml │ │ │ │ ├── anli_r2.yaml │ │ │ │ └── anli_r3.yaml │ │ │ ├── arabic_leaderboard_complete │ │ │ │ ├── README.md │ │ │ │ ├── arabic_leaderboard_alghafa │ │ │ │ │ ├── arabic_leaderboard_alghafa.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_mcq_exams_test_ar.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_meta_ar_dialects.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_meta_ar_msa.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_multiple_choice_facts_truefalse_balanced_task.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_multiple_choice_grounded_statement_soqal_task.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_multiple_choice_grounded_statement_xglue_mlqa_task.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_no_neutral_task.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_task.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_multiple_choice_sentiment_task.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_exams │ │ │ │ │ ├── arabic_exams.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_exams.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mmlu │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_abstract_algebra.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_anatomy.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_astronomy.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_business_ethics.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_clinical_knowledge.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_college_biology.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_college_chemistry.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_college_computer_science.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_college_mathematics.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_college_medicine.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_college_physics.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_computer_security.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_conceptual_physics.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_econometrics.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_electrical_engineering.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_elementary_mathematics.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_formal_logic.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_global_facts.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_biology.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_chemistry.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_computer_science.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_european_history.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_geography.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_government_and_politics.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_macroeconomics.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_mathematics.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_microeconomics.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_physics.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_psychology.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_statistics.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_us_history.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_world_history.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_human_aging.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_human_sexuality.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_international_law.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_jurisprudence.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_logical_fallacies.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_machine_learning.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_management.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_marketing.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_medical_genetics.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_miscellaneous.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_moral_disputes.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_moral_scenarios.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_nutrition.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_philosophy.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_prehistory.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_professional_accounting.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_professional_law.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_professional_medicine.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_professional_psychology.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_public_relations.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_security_studies.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_sociology.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_us_foreign_policy.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_virology.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_world_religions.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_arc_challenge │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_arc_challenge.yaml │ │ │ │ │ ├── arabic_mt_arc_challenge.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_arc_easy │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_arc_easy.yaml │ │ │ │ │ ├── arabic_mt_arc_easy.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_boolq │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_boolq.yaml │ │ │ │ │ ├── arabic_mt_boolq.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_copa │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_copa.yaml │ │ │ │ │ ├── arabic_mt_copa.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_hellaswag │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_hellaswag.yaml │ │ │ │ │ ├── arabic_mt_hellaswag.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_mmlu │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_mmlu.yaml │ │ │ │ │ ├── arabic_mt_mmlu.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_openbook_qa │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_openbook_qa.yaml │ │ │ │ │ ├── arabic_mt_openbook_qa.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_piqa │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_piqa.yaml │ │ │ │ │ ├── arabic_mt_piqa.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_race │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_race.yaml │ │ │ │ │ ├── arabic_mt_race.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_sciq │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_sciq.yaml │ │ │ │ │ ├── arabic_mt_sciq.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_toxigen │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_toxigen.yaml │ │ │ │ │ ├── arabic_mt_toxigen.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_avca │ │ │ │ │ ├── arabic_leaderboard_acva.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Algeria.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Ancient_Egypt.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arab_Empire.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Architecture.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Art.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Astronomy.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Calligraphy.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Ceremony.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Clothing.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Culture.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Food.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Funeral.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Geography.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_History.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Language_Origin.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Literature.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Math.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Medicine.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Music.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Ornament.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Philosophy.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Physics_and_Chemistry.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Wedding.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Bahrain.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Comoros.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Egypt_modern.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_InfluenceFromAncientEgypt.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_InfluenceFromByzantium.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_InfluenceFromChina.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_InfluenceFromGreece.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_InfluenceFromIslam.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_InfluenceFromPersia.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_InfluenceFromRome.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Iraq.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Islam_Education.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Islam_branches_and_schools.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Islamic_law_system.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Jordan.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Kuwait.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Lebanon.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Libya.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Mauritania.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Mesopotamia_civilization.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Morocco.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Oman.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Palestine.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Qatar.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Saudi_Arabia.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Somalia.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Sudan.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Syria.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Tunisia.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_United_Arab_Emirates.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Yemen.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_communication.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_computer_and_phone.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_daily_life.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_entertainment.yaml │ │ │ │ │ └── utils.py │ │ │ │ └── arabic_leaderboard_complete.yaml │ │ │ ├── arabic_leaderboard_light │ │ │ │ ├── README.md │ │ │ │ ├── arabic_leaderboard_alghafa_light │ │ │ │ │ ├── arabic_leaderboard_alghafa_light.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_mcq_exams_test_ar_light.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_meta_ar_dialects_light.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_meta_ar_msa_light.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_multiple_choice_facts_truefalse_balanced_task_light.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_multiple_choice_grounded_statement_soqal_task_light.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_multiple_choice_grounded_statement_xglue_mlqa_task_light.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_no_neutral_task_light.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_task_light.yaml │ │ │ │ │ ├── arabic_leaderboard_alghafa_multiple_choice_sentiment_task_light.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_exams_light │ │ │ │ │ ├── arabic_exams_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_exams_light.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_light │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_abstract_algebra_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_anatomy_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_astronomy_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_business_ethics_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_clinical_knowledge_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_college_biology_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_college_chemistry_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_college_computer_science_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_college_mathematics_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_college_medicine_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_college_physics_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_computer_security_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_conceptual_physics_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_econometrics_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_electrical_engineering_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_elementary_mathematics_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_formal_logic_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_global_facts_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_biology_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_chemistry_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_computer_science_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_european_history_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_geography_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_government_and_politics_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_macroeconomics_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_mathematics_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_microeconomics_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_physics_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_psychology_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_statistics_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_us_history_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_high_school_world_history_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_human_aging_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_human_sexuality_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_international_law_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_jurisprudence_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_logical_fallacies_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_machine_learning_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_management_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_marketing_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_medical_genetics_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_miscellaneous_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_moral_disputes_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_moral_scenarios_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_nutrition_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_philosophy_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_prehistory_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_professional_accounting_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_professional_law_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_professional_medicine_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_professional_psychology_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_public_relations_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_security_studies_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_sociology_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_us_foreign_policy_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_virology_light.yaml │ │ │ │ │ ├── arabic_leaderboard_arabic_mmlu_world_religions_light.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_arc_challenge_light │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_arc_challenge_light.yaml │ │ │ │ │ ├── arabic_mt_arc_challenge_light.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_arc_easy_light │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_arc_easy_light.yaml │ │ │ │ │ ├── arabic_mt_arc_easy_light.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_boolq_light │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_boolq_light.yaml │ │ │ │ │ ├── arabic_mt_boolq_light.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_copa_light │ │ │ │ │ ├── arabic_mt_copa_light.yaml │ │ │ │ │ ├── arbic_leaderboard_arabic_mt_copa_light.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_hellaswag_light │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_hellaswag_light.yaml │ │ │ │ │ ├── arabic_mt_hellaswag_light.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_mmlu_light │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_mmlu_light.yaml │ │ │ │ │ ├── arabic_mt_mmlu_light.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_openbook_qa_light │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_openbook_qa_light.yaml │ │ │ │ │ ├── arabic_mt_openbook_qa_light.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_piqa_light │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_piqa_light.yaml │ │ │ │ │ ├── arabic_mt_piqa_light.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_race_light │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_race_light.yaml │ │ │ │ │ ├── arabic_mt_race_light.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_sciq_light │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_sciq_light.yaml │ │ │ │ │ ├── arabic_mt_sciq_light.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_arabic_mt_toxigen_light │ │ │ │ │ ├── arabic_leaderboard_arabic_mt_toxigen_light.yaml │ │ │ │ │ ├── arabic_mt_toxigen_light.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── arabic_leaderboard_avca_light │ │ │ │ │ ├── arabic_leaderboard_acva_Algeria_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Ancient_Egypt_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arab_Empire_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Architecture_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Art_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Astronomy_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Calligraphy_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Ceremony_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Clothing_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Culture_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Food_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Funeral_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Geography_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_History_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Language_Origin_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Literature_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Math_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Medicine_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Music_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Ornament_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Philosophy_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Physics_and_Chemistry_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Arabic_Wedding_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Bahrain_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Comoros_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Egypt_modern_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_InfluenceFromAncientEgypt_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_InfluenceFromByzantium_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_InfluenceFromChina_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_InfluenceFromGreece_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_InfluenceFromIslam_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_InfluenceFromPersia_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_InfluenceFromRome_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Iraq_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Islam_Education_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Islam_branches_and_schools_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Islamic_law_system_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Jordan_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Kuwait_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Lebanon_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Libya_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Mauritania_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Mesopotamia_civilization_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Morocco_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Oman_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Palestine_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Qatar_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Saudi_Arabia_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Somalia_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Sudan_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Syria_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Tunisia_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_United_Arab_Emirates_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_Yemen_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_communication_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_computer_and_phone_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_daily_life_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_entertainment_light.yaml │ │ │ │ │ ├── arabic_leaderboard_acva_light.yaml │ │ │ │ │ └── utils.py │ │ │ │ └── arabic_leaderboard_light.yaml │ │ │ ├── arabicmmlu │ │ │ │ ├── README.md │ │ │ │ ├── _arabicmmlu.yaml │ │ │ │ ├── _arabicmmlu_humanities.yaml │ │ │ │ ├── _arabicmmlu_language.yaml │ │ │ │ ├── _arabicmmlu_other.yaml │ │ │ │ ├── _arabicmmlu_social_science.yaml │ │ │ │ ├── _arabicmmlu_stem.yaml │ │ │ │ ├── _default_arabicmmlu_template_yaml │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── arabicmmlu_arabic_language_general.yaml │ │ │ │ ├── arabicmmlu_arabic_language_grammar.yaml │ │ │ │ ├── arabicmmlu_driving_test.yaml │ │ │ │ ├── arabicmmlu_general_knowledge.yaml │ │ │ │ ├── arabicmmlu_high_arabic_language.yaml │ │ │ │ ├── arabicmmlu_high_biology.yaml │ │ │ │ ├── arabicmmlu_high_civics.yaml │ │ │ │ ├── arabicmmlu_high_computer_science.yaml │ │ │ │ ├── arabicmmlu_high_economics.yaml │ │ │ │ ├── arabicmmlu_high_geography.yaml │ │ │ │ ├── arabicmmlu_high_history.yaml │ │ │ │ ├── arabicmmlu_high_islamic_studies.yaml │ │ │ │ ├── arabicmmlu_high_philosophy.yaml │ │ │ │ ├── arabicmmlu_high_physics.yaml │ │ │ │ ├── arabicmmlu_islamic_studies.yaml │ │ │ │ ├── arabicmmlu_middle_arabic_language.yaml │ │ │ │ ├── arabicmmlu_middle_civics.yaml │ │ │ │ ├── arabicmmlu_middle_computer_science.yaml │ │ │ │ ├── arabicmmlu_middle_economics.yaml │ │ │ │ ├── arabicmmlu_middle_general_knowledge.yaml │ │ │ │ ├── arabicmmlu_middle_geography.yaml │ │ │ │ ├── arabicmmlu_middle_history.yaml │ │ │ │ ├── arabicmmlu_middle_islamic_studies.yaml │ │ │ │ ├── arabicmmlu_middle_natural_science.yaml │ │ │ │ ├── arabicmmlu_middle_social_science.yaml │ │ │ │ ├── arabicmmlu_primary_arabic_language.yaml │ │ │ │ ├── arabicmmlu_primary_computer_science.yaml │ │ │ │ ├── arabicmmlu_primary_general_knowledge.yaml │ │ │ │ ├── arabicmmlu_primary_geography.yaml │ │ │ │ ├── arabicmmlu_primary_history.yaml │ │ │ │ ├── arabicmmlu_primary_islamic_studies.yaml │ │ │ │ ├── arabicmmlu_primary_math.yaml │ │ │ │ ├── arabicmmlu_primary_natural_science.yaml │ │ │ │ ├── arabicmmlu_primary_social_science.yaml │ │ │ │ ├── arabicmmlu_prof_law.yaml │ │ │ │ ├── arabicmmlu_univ_accounting.yaml │ │ │ │ ├── arabicmmlu_univ_computer_science.yaml │ │ │ │ ├── arabicmmlu_univ_economics.yaml │ │ │ │ ├── arabicmmlu_univ_management.yaml │ │ │ │ ├── arabicmmlu_univ_political_science.yaml │ │ │ │ └── utils.py │ │ │ ├── arc │ │ │ │ ├── README.md │ │ │ │ ├── arc_challenge.yaml │ │ │ │ └── arc_easy.yaml │ │ │ ├── arc_mt │ │ │ │ ├── README.md │ │ │ │ ├── arc_challenge_mt_da.yaml │ │ │ │ ├── arc_challenge_mt_de.yaml │ │ │ │ ├── arc_challenge_mt_el.yaml │ │ │ │ ├── arc_challenge_mt_es.yaml │ │ │ │ ├── arc_challenge_mt_fi.yaml │ │ │ │ ├── arc_challenge_mt_hu.yaml │ │ │ │ ├── arc_challenge_mt_is.yaml │ │ │ │ ├── arc_challenge_mt_it.yaml │ │ │ │ ├── arc_challenge_mt_nb.yaml │ │ │ │ ├── arc_challenge_mt_pl.yaml │ │ │ │ ├── arc_challenge_mt_pt.yaml │ │ │ │ └── arc_challenge_mt_sv.yaml │ │ │ ├── arithmetic │ │ │ │ ├── README.md │ │ │ │ ├── arithmetic_1dc.yaml │ │ │ │ ├── arithmetic_2da.yaml │ │ │ │ ├── arithmetic_2dm.yaml │ │ │ │ ├── arithmetic_2ds.yaml │ │ │ │ ├── arithmetic_3da.yaml │ │ │ │ ├── arithmetic_3ds.yaml │ │ │ │ ├── arithmetic_4da.yaml │ │ │ │ ├── arithmetic_4ds.yaml │ │ │ │ ├── arithmetic_5da.yaml │ │ │ │ └── arithmetic_5ds.yaml │ │ │ ├── asdiv │ │ │ │ ├── README.md │ │ │ │ ├── asdiv-cot-llama.yaml │ │ │ │ └── default.yaml │ │ │ ├── babi │ │ │ │ ├── README.md │ │ │ │ └── babi.yaml │ │ │ ├── basque_bench │ │ │ │ ├── README.md │ │ │ │ ├── basque_bench.yaml │ │ │ │ ├── flores_eu │ │ │ │ │ ├── _flores_common_yaml │ │ │ │ │ ├── create_yamls_flores_eu.py │ │ │ │ │ ├── flores_ca-eu.yaml │ │ │ │ │ ├── flores_de-eu.yaml │ │ │ │ │ ├── flores_en-eu.yaml │ │ │ │ │ ├── flores_es-eu.yaml │ │ │ │ │ ├── flores_eu-ca.yaml │ │ │ │ │ ├── flores_eu-de.yaml │ │ │ │ │ ├── flores_eu-en.yaml │ │ │ │ │ ├── flores_eu-es.yaml │ │ │ │ │ ├── flores_eu-fr.yaml │ │ │ │ │ ├── flores_eu-gl.yaml │ │ │ │ │ ├── flores_eu-it.yaml │ │ │ │ │ ├── flores_eu-pt.yaml │ │ │ │ │ ├── flores_eu.yaml │ │ │ │ │ ├── flores_fr-eu.yaml │ │ │ │ │ ├── flores_gl-eu.yaml │ │ │ │ │ ├── flores_it-eu.yaml │ │ │ │ │ └── flores_pt-eu.yaml │ │ │ │ ├── mgsm_cot_native_eu.yaml │ │ │ │ ├── mgsm_direct_eu.yaml │ │ │ │ ├── utils.py │ │ │ │ ├── wnli_eu.yaml │ │ │ │ └── xcopa_eu.yaml │ │ │ ├── basqueglue │ │ │ │ ├── README.md │ │ │ │ ├── bec.yaml │ │ │ │ ├── bhtc.yaml │ │ │ │ ├── coref.yaml │ │ │ │ ├── qnli.yaml │ │ │ │ ├── utils.py │ │ │ │ ├── vaxx.yaml │ │ │ │ └── wic.yaml │ │ │ ├── bbh │ │ │ │ ├── README.md │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── cot_fewshot │ │ │ │ │ ├── _bbh.yaml │ │ │ │ │ ├── _bbh_cot_fewshot.yaml │ │ │ │ │ ├── _cot_fewshot_template_yaml │ │ │ │ │ ├── boolean_expressions.yaml │ │ │ │ │ ├── causal_judgement.yaml │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ ├── formal_fallacies.yaml │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ ├── logical_deduction_five_objects.yaml │ │ │ │ │ ├── logical_deduction_seven_objects.yaml │ │ │ │ │ ├── logical_deduction_three_objects.yaml │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ ├── multistep_arithmetic_two.yaml │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ ├── tracking_shuffled_objects_five_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_seven_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_three_objects.yaml │ │ │ │ │ ├── web_of_lies.yaml │ │ │ │ │ └── word_sorting.yaml │ │ │ │ ├── cot_zeroshot │ │ │ │ │ ├── _bbh_cot_zeroshot.yaml │ │ │ │ │ ├── _cot_zeroshot_template_yaml │ │ │ │ │ ├── boolean_expressions.yaml │ │ │ │ │ ├── causal_judgement.yaml │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ ├── formal_fallacies.yaml │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ ├── logical_deduction_five_objects.yaml │ │ │ │ │ ├── logical_deduction_seven_objects.yaml │ │ │ │ │ ├── logical_deduction_three_objects.yaml │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ ├── multistep_arithmetic_two.yaml │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ ├── tracking_shuffled_objects_five_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_seven_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_three_objects.yaml │ │ │ │ │ ├── utils.py │ │ │ │ │ ├── web_of_lies.yaml │ │ │ │ │ └── word_sorting.yaml │ │ │ │ ├── fewshot │ │ │ │ │ ├── _bbh_fewshot.yaml │ │ │ │ │ ├── _fewshot_template_yaml │ │ │ │ │ ├── boolean_expressions.yaml │ │ │ │ │ ├── causal_judgement.yaml │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ ├── formal_fallacies.yaml │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ ├── logical_deduction_five_objects.yaml │ │ │ │ │ ├── logical_deduction_seven_objects.yaml │ │ │ │ │ ├── logical_deduction_three_objects.yaml │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ ├── multistep_arithmetic_two.yaml │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ ├── tracking_shuffled_objects_five_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_seven_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_three_objects.yaml │ │ │ │ │ ├── web_of_lies.yaml │ │ │ │ │ └── word_sorting.yaml │ │ │ │ └── zeroshot │ │ │ │ │ ├── _bbh_zeroshot.yaml │ │ │ │ │ ├── _zeroshot_template_yaml │ │ │ │ │ ├── boolean_expressions.yaml │ │ │ │ │ ├── causal_judgement.yaml │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ ├── formal_fallacies.yaml │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ ├── logical_deduction_five_objects.yaml │ │ │ │ │ ├── logical_deduction_seven_objects.yaml │ │ │ │ │ ├── logical_deduction_three_objects.yaml │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ ├── multistep_arithmetic_two.yaml │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ ├── tracking_shuffled_objects_five_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_seven_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_three_objects.yaml │ │ │ │ │ ├── utils.py │ │ │ │ │ ├── web_of_lies.yaml │ │ │ │ │ └── word_sorting.yaml │ │ │ ├── belebele │ │ │ │ ├── README.md │ │ │ │ ├── _belebele.yaml │ │ │ │ ├── _default_template_yaml │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── belebele_acm_Arab.yaml │ │ │ │ ├── belebele_afr_Latn.yaml │ │ │ │ ├── belebele_als_Latn.yaml │ │ │ │ ├── belebele_amh_Ethi.yaml │ │ │ │ ├── belebele_apc_Arab.yaml │ │ │ │ ├── belebele_arb_Arab.yaml │ │ │ │ ├── belebele_arb_Latn.yaml │ │ │ │ ├── belebele_ars_Arab.yaml │ │ │ │ ├── belebele_ary_Arab.yaml │ │ │ │ ├── belebele_arz_Arab.yaml │ │ │ │ ├── belebele_asm_Beng.yaml │ │ │ │ ├── belebele_azj_Latn.yaml │ │ │ │ ├── belebele_bam_Latn.yaml │ │ │ │ ├── belebele_ben_Beng.yaml │ │ │ │ ├── belebele_ben_Latn.yaml │ │ │ │ ├── belebele_bod_Tibt.yaml │ │ │ │ ├── belebele_bul_Cyrl.yaml │ │ │ │ ├── belebele_cat_Latn.yaml │ │ │ │ ├── belebele_ceb_Latn.yaml │ │ │ │ ├── belebele_ces_Latn.yaml │ │ │ │ ├── belebele_ckb_Arab.yaml │ │ │ │ ├── belebele_dan_Latn.yaml │ │ │ │ ├── belebele_deu_Latn.yaml │ │ │ │ ├── belebele_ell_Grek.yaml │ │ │ │ ├── belebele_eng_Latn.yaml │ │ │ │ ├── belebele_est_Latn.yaml │ │ │ │ ├── belebele_eus_Latn.yaml │ │ │ │ ├── belebele_fin_Latn.yaml │ │ │ │ ├── belebele_fra_Latn.yaml │ │ │ │ ├── belebele_fuv_Latn.yaml │ │ │ │ ├── belebele_gaz_Latn.yaml │ │ │ │ ├── belebele_grn_Latn.yaml │ │ │ │ ├── belebele_guj_Gujr.yaml │ │ │ │ ├── belebele_hat_Latn.yaml │ │ │ │ ├── belebele_hau_Latn.yaml │ │ │ │ ├── belebele_heb_Hebr.yaml │ │ │ │ ├── belebele_hin_Deva.yaml │ │ │ │ ├── belebele_hin_Latn.yaml │ │ │ │ ├── belebele_hrv_Latn.yaml │ │ │ │ ├── belebele_hun_Latn.yaml │ │ │ │ ├── belebele_hye_Armn.yaml │ │ │ │ ├── belebele_ibo_Latn.yaml │ │ │ │ ├── belebele_ilo_Latn.yaml │ │ │ │ ├── belebele_ind_Latn.yaml │ │ │ │ ├── belebele_isl_Latn.yaml │ │ │ │ ├── belebele_ita_Latn.yaml │ │ │ │ ├── belebele_jav_Latn.yaml │ │ │ │ ├── belebele_jpn_Jpan.yaml │ │ │ │ ├── belebele_kac_Latn.yaml │ │ │ │ ├── belebele_kan_Knda.yaml │ │ │ │ ├── belebele_kat_Geor.yaml │ │ │ │ ├── belebele_kaz_Cyrl.yaml │ │ │ │ ├── belebele_kea_Latn.yaml │ │ │ │ ├── belebele_khk_Cyrl.yaml │ │ │ │ ├── belebele_khm_Khmr.yaml │ │ │ │ ├── belebele_kin_Latn.yaml │ │ │ │ ├── belebele_kir_Cyrl.yaml │ │ │ │ ├── belebele_kor_Hang.yaml │ │ │ │ ├── belebele_lao_Laoo.yaml │ │ │ │ ├── belebele_lin_Latn.yaml │ │ │ │ ├── belebele_lit_Latn.yaml │ │ │ │ ├── belebele_lug_Latn.yaml │ │ │ │ ├── belebele_luo_Latn.yaml │ │ │ │ ├── belebele_lvs_Latn.yaml │ │ │ │ ├── belebele_mal_Mlym.yaml │ │ │ │ ├── belebele_mar_Deva.yaml │ │ │ │ ├── belebele_mkd_Cyrl.yaml │ │ │ │ ├── belebele_mlt_Latn.yaml │ │ │ │ ├── belebele_mri_Latn.yaml │ │ │ │ ├── belebele_mya_Mymr.yaml │ │ │ │ ├── belebele_nld_Latn.yaml │ │ │ │ ├── belebele_nob_Latn.yaml │ │ │ │ ├── belebele_npi_Deva.yaml │ │ │ │ ├── belebele_npi_Latn.yaml │ │ │ │ ├── belebele_nso_Latn.yaml │ │ │ │ ├── belebele_nya_Latn.yaml │ │ │ │ ├── belebele_ory_Orya.yaml │ │ │ │ ├── belebele_pan_Guru.yaml │ │ │ │ ├── belebele_pbt_Arab.yaml │ │ │ │ ├── belebele_pes_Arab.yaml │ │ │ │ ├── belebele_plt_Latn.yaml │ │ │ │ ├── belebele_pol_Latn.yaml │ │ │ │ ├── belebele_por_Latn.yaml │ │ │ │ ├── belebele_ron_Latn.yaml │ │ │ │ ├── belebele_rus_Cyrl.yaml │ │ │ │ ├── belebele_shn_Mymr.yaml │ │ │ │ ├── belebele_sin_Latn.yaml │ │ │ │ ├── belebele_sin_Sinh.yaml │ │ │ │ ├── belebele_slk_Latn.yaml │ │ │ │ ├── belebele_slv_Latn.yaml │ │ │ │ ├── belebele_sna_Latn.yaml │ │ │ │ ├── belebele_snd_Arab.yaml │ │ │ │ ├── belebele_som_Latn.yaml │ │ │ │ ├── belebele_sot_Latn.yaml │ │ │ │ ├── belebele_spa_Latn.yaml │ │ │ │ ├── belebele_srp_Cyrl.yaml │ │ │ │ ├── belebele_ssw_Latn.yaml │ │ │ │ ├── belebele_sun_Latn.yaml │ │ │ │ ├── belebele_swe_Latn.yaml │ │ │ │ ├── belebele_swh_Latn.yaml │ │ │ │ ├── belebele_tam_Taml.yaml │ │ │ │ ├── belebele_tel_Telu.yaml │ │ │ │ ├── belebele_tgk_Cyrl.yaml │ │ │ │ ├── belebele_tgl_Latn.yaml │ │ │ │ ├── belebele_tha_Thai.yaml │ │ │ │ ├── belebele_tir_Ethi.yaml │ │ │ │ ├── belebele_tsn_Latn.yaml │ │ │ │ ├── belebele_tso_Latn.yaml │ │ │ │ ├── belebele_tur_Latn.yaml │ │ │ │ ├── belebele_ukr_Cyrl.yaml │ │ │ │ ├── belebele_urd_Arab.yaml │ │ │ │ ├── belebele_urd_Latn.yaml │ │ │ │ ├── belebele_uzn_Latn.yaml │ │ │ │ ├── belebele_vie_Latn.yaml │ │ │ │ ├── belebele_war_Latn.yaml │ │ │ │ ├── belebele_wol_Latn.yaml │ │ │ │ ├── belebele_xho_Latn.yaml │ │ │ │ ├── belebele_yor_Latn.yaml │ │ │ │ ├── belebele_zho_Hans.yaml │ │ │ │ ├── belebele_zho_Hant.yaml │ │ │ │ ├── belebele_zsm_Latn.yaml │ │ │ │ └── belebele_zul_Latn.yaml │ │ │ ├── benchmarks │ │ │ │ ├── flan │ │ │ │ │ ├── _held_in_template_yaml │ │ │ │ │ ├── flan_held_in.yaml │ │ │ │ │ └── flan_held_out.yaml │ │ │ │ ├── minerva_math.yaml │ │ │ │ ├── multimedqa │ │ │ │ │ ├── README.md │ │ │ │ │ └── multimedqa.yaml │ │ │ │ ├── openllm.yaml │ │ │ │ ├── pythia.yaml │ │ │ │ └── t0_eval.yaml │ │ │ ├── bertaqa │ │ │ │ ├── README.md │ │ │ │ ├── _bertaqa_template │ │ │ │ ├── bertaqa_en.yaml │ │ │ │ ├── bertaqa_en_mt_gemma-7b.yaml │ │ │ │ ├── bertaqa_en_mt_hitz.yaml │ │ │ │ ├── bertaqa_en_mt_itzuli.yaml │ │ │ │ ├── bertaqa_en_mt_latxa-13b-v1.1.yaml │ │ │ │ ├── bertaqa_en_mt_latxa-13b-v1.yaml │ │ │ │ ├── bertaqa_en_mt_latxa-70b-v1.1.yaml │ │ │ │ ├── bertaqa_en_mt_latxa-70b-v1.yaml │ │ │ │ ├── bertaqa_en_mt_latxa-7b-v1.1.yaml │ │ │ │ ├── bertaqa_en_mt_latxa-7b-v1.yaml │ │ │ │ ├── bertaqa_en_mt_llama-2-13b.yaml │ │ │ │ ├── bertaqa_en_mt_llama-2-70b.yaml │ │ │ │ ├── bertaqa_en_mt_llama-2-7b.yaml │ │ │ │ ├── bertaqa_en_mt_madlad.yaml │ │ │ │ ├── bertaqa_en_mt_nllb.yaml │ │ │ │ └── bertaqa_eu.yaml │ │ │ ├── bigbench │ │ │ │ ├── README.md │ │ │ │ ├── generate_tasks.py │ │ │ │ ├── generate_until │ │ │ │ │ ├── abstract_narrative_understanding.yaml │ │ │ │ │ ├── anachronisms.yaml │ │ │ │ │ ├── analogical_similarity.yaml │ │ │ │ │ ├── analytic_entailment.yaml │ │ │ │ │ ├── arithmetic.yaml │ │ │ │ │ ├── ascii_word_recognition.yaml │ │ │ │ │ ├── authorship_verification.yaml │ │ │ │ │ ├── auto_categorization.yaml │ │ │ │ │ ├── auto_debugging.yaml │ │ │ │ │ ├── bbq_lite_json.yaml │ │ │ │ │ ├── bridging_anaphora_resolution_barqa.yaml │ │ │ │ │ ├── causal_judgment.yaml │ │ │ │ │ ├── cause_and_effect.yaml │ │ │ │ │ ├── checkmate_in_one.yaml │ │ │ │ │ ├── chess_state_tracking.yaml │ │ │ │ │ ├── chinese_remainder_theorem.yaml │ │ │ │ │ ├── cifar10_classification.yaml │ │ │ │ │ ├── code_line_description.yaml │ │ │ │ │ ├── codenames.yaml │ │ │ │ │ ├── color.yaml │ │ │ │ │ ├── common_morpheme.yaml │ │ │ │ │ ├── conceptual_combinations.yaml │ │ │ │ │ ├── conlang_translation.yaml │ │ │ │ │ ├── contextual_parametric_knowledge_conflicts.yaml │ │ │ │ │ ├── crash_blossom.yaml │ │ │ │ │ ├── crass_ai.yaml │ │ │ │ │ ├── cryobiology_spanish.yaml │ │ │ │ │ ├── cryptonite.yaml │ │ │ │ │ ├── cs_algorithms.yaml │ │ │ │ │ ├── dark_humor_detection.yaml │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ ├── discourse_marker_prediction.yaml │ │ │ │ │ ├── disfl_qa.yaml │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ ├── elementary_math_qa.yaml │ │ │ │ │ ├── emoji_movie.yaml │ │ │ │ │ ├── emojis_emotion_prediction.yaml │ │ │ │ │ ├── empirical_judgments.yaml │ │ │ │ │ ├── english_proverbs.yaml │ │ │ │ │ ├── english_russian_proverbs.yaml │ │ │ │ │ ├── entailed_polarity.yaml │ │ │ │ │ ├── entailed_polarity_hindi.yaml │ │ │ │ │ ├── epistemic_reasoning.yaml │ │ │ │ │ ├── evaluating_information_essentiality.yaml │ │ │ │ │ ├── fact_checker.yaml │ │ │ │ │ ├── fantasy_reasoning.yaml │ │ │ │ │ ├── few_shot_nlg.yaml │ │ │ │ │ ├── figure_of_speech_detection.yaml │ │ │ │ │ ├── formal_fallacies_syllogisms_negation.yaml │ │ │ │ │ ├── gem.yaml │ │ │ │ │ ├── gender_inclusive_sentences_german.yaml │ │ │ │ │ ├── general_knowledge.yaml │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ ├── goal_step_wikihow.yaml │ │ │ │ │ ├── gre_reading_comprehension.yaml │ │ │ │ │ ├── hhh_alignment.yaml │ │ │ │ │ ├── hindi_question_answering.yaml │ │ │ │ │ ├── hindu_knowledge.yaml │ │ │ │ │ ├── hinglish_toxicity.yaml │ │ │ │ │ ├── human_organs_senses.yaml │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ ├── identify_math_theorems.yaml │ │ │ │ │ ├── identify_odd_metaphor.yaml │ │ │ │ │ ├── implicatures.yaml │ │ │ │ │ ├── implicit_relations.yaml │ │ │ │ │ ├── intent_recognition.yaml │ │ │ │ │ ├── international_phonetic_alphabet_nli.yaml │ │ │ │ │ ├── international_phonetic_alphabet_transliterate.yaml │ │ │ │ │ ├── intersect_geometry.yaml │ │ │ │ │ ├── irony_identification.yaml │ │ │ │ │ ├── kanji_ascii.yaml │ │ │ │ │ ├── kannada.yaml │ │ │ │ │ ├── key_value_maps.yaml │ │ │ │ │ ├── known_unknowns.yaml │ │ │ │ │ ├── language_games.yaml │ │ │ │ │ ├── language_identification.yaml │ │ │ │ │ ├── linguistic_mappings.yaml │ │ │ │ │ ├── linguistics_puzzles.yaml │ │ │ │ │ ├── list_functions.yaml │ │ │ │ │ ├── logic_grid_puzzle.yaml │ │ │ │ │ ├── logical_args.yaml │ │ │ │ │ ├── logical_deduction.yaml │ │ │ │ │ ├── logical_fallacy_detection.yaml │ │ │ │ │ ├── logical_sequence.yaml │ │ │ │ │ ├── mathematical_induction.yaml │ │ │ │ │ ├── matrixshapes.yaml │ │ │ │ │ ├── metaphor_boolean.yaml │ │ │ │ │ ├── metaphor_understanding.yaml │ │ │ │ │ ├── minute_mysteries_qa.yaml │ │ │ │ │ ├── misconceptions.yaml │ │ │ │ │ ├── misconceptions_russian.yaml │ │ │ │ │ ├── mnist_ascii.yaml │ │ │ │ │ ├── modified_arithmetic.yaml │ │ │ │ │ ├── moral_permissibility.yaml │ │ │ │ │ ├── movie_dialog_same_or_different.yaml │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ ├── mult_data_wrangling.yaml │ │ │ │ │ ├── multiemo.yaml │ │ │ │ │ ├── natural_instructions.yaml │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ ├── nonsense_words_grammar.yaml │ │ │ │ │ ├── novel_concepts.yaml │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ ├── odd_one_out.yaml │ │ │ │ │ ├── operators.yaml │ │ │ │ │ ├── paragraph_segmentation.yaml │ │ │ │ │ ├── parsinlu_qa.yaml │ │ │ │ │ ├── parsinlu_reading_comprehension.yaml │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ ├── periodic_elements.yaml │ │ │ │ │ ├── persian_idioms.yaml │ │ │ │ │ ├── phrase_relatedness.yaml │ │ │ │ │ ├── physical_intuition.yaml │ │ │ │ │ ├── physics.yaml │ │ │ │ │ ├── physics_questions.yaml │ │ │ │ │ ├── play_dialog_same_or_different.yaml │ │ │ │ │ ├── polish_sequence_labeling.yaml │ │ │ │ │ ├── presuppositions_as_nli.yaml │ │ │ │ │ ├── qa_wikidata.yaml │ │ │ │ │ ├── question_selection.yaml │ │ │ │ │ ├── real_or_fake_text.yaml │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ ├── repeat_copy_logic.yaml │ │ │ │ │ ├── rephrase.yaml │ │ │ │ │ ├── riddle_sense.yaml │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ ├── scientific_press_release.yaml │ │ │ │ │ ├── semantic_parsing_in_context_sparc.yaml │ │ │ │ │ ├── semantic_parsing_spider.yaml │ │ │ │ │ ├── sentence_ambiguity.yaml │ │ │ │ │ ├── similarities_abstraction.yaml │ │ │ │ │ ├── simp_turing_concept.yaml │ │ │ │ │ ├── simple_arithmetic_json.yaml │ │ │ │ │ ├── simple_arithmetic_json_multiple_choice.yaml │ │ │ │ │ ├── simple_arithmetic_json_subtasks.yaml │ │ │ │ │ ├── simple_arithmetic_multiple_targets_json.yaml │ │ │ │ │ ├── simple_ethical_questions.yaml │ │ │ │ │ ├── simple_text_editing.yaml │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ ├── social_iqa.yaml │ │ │ │ │ ├── social_support.yaml │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ ├── strange_stories.yaml │ │ │ │ │ ├── strategyqa.yaml │ │ │ │ │ ├── sufficient_information.yaml │ │ │ │ │ ├── suicide_risk.yaml │ │ │ │ │ ├── swahili_english_proverbs.yaml │ │ │ │ │ ├── swedish_to_german_proverbs.yaml │ │ │ │ │ ├── symbol_interpretation.yaml │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ ├── tense.yaml │ │ │ │ │ ├── timedial.yaml │ │ │ │ │ ├── topical_chat.yaml │ │ │ │ │ ├── tracking_shuffled_objects.yaml │ │ │ │ │ ├── understanding_fables.yaml │ │ │ │ │ ├── undo_permutation.yaml │ │ │ │ │ ├── unit_conversion.yaml │ │ │ │ │ ├── unit_interpretation.yaml │ │ │ │ │ ├── unnatural_in_context_learning.yaml │ │ │ │ │ ├── vitaminc_fact_verification.yaml │ │ │ │ │ ├── what_is_the_tao.yaml │ │ │ │ │ ├── which_wiki_edit.yaml │ │ │ │ │ ├── winowhy.yaml │ │ │ │ │ ├── word_sorting.yaml │ │ │ │ │ └── word_unscrambling.yaml │ │ │ │ ├── generate_until_template_yaml │ │ │ │ ├── multiple_choice │ │ │ │ │ ├── abstract_narrative_understanding.yaml │ │ │ │ │ ├── anachronisms.yaml │ │ │ │ │ ├── analogical_similarity.yaml │ │ │ │ │ ├── analytic_entailment.yaml │ │ │ │ │ ├── arithmetic.yaml │ │ │ │ │ ├── authorship_verification.yaml │ │ │ │ │ ├── bbq_lite_json.yaml │ │ │ │ │ ├── causal_judgment.yaml │ │ │ │ │ ├── cause_and_effect.yaml │ │ │ │ │ ├── checkmate_in_one.yaml │ │ │ │ │ ├── cifar10_classification.yaml │ │ │ │ │ ├── code_line_description.yaml │ │ │ │ │ ├── color.yaml │ │ │ │ │ ├── common_morpheme.yaml │ │ │ │ │ ├── conceptual_combinations.yaml │ │ │ │ │ ├── contextual_parametric_knowledge_conflicts.yaml │ │ │ │ │ ├── crash_blossom.yaml │ │ │ │ │ ├── crass_ai.yaml │ │ │ │ │ ├── cryobiology_spanish.yaml │ │ │ │ │ ├── cs_algorithms.yaml │ │ │ │ │ ├── dark_humor_detection.yaml │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ ├── discourse_marker_prediction.yaml │ │ │ │ │ ├── dyck_languages.yaml │ │ │ │ │ ├── elementary_math_qa.yaml │ │ │ │ │ ├── emoji_movie.yaml │ │ │ │ │ ├── emojis_emotion_prediction.yaml │ │ │ │ │ ├── empirical_judgments.yaml │ │ │ │ │ ├── english_proverbs.yaml │ │ │ │ │ ├── english_russian_proverbs.yaml │ │ │ │ │ ├── entailed_polarity.yaml │ │ │ │ │ ├── entailed_polarity_hindi.yaml │ │ │ │ │ ├── epistemic_reasoning.yaml │ │ │ │ │ ├── evaluating_information_essentiality.yaml │ │ │ │ │ ├── fact_checker.yaml │ │ │ │ │ ├── fantasy_reasoning.yaml │ │ │ │ │ ├── figure_of_speech_detection.yaml │ │ │ │ │ ├── formal_fallacies_syllogisms_negation.yaml │ │ │ │ │ ├── general_knowledge.yaml │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ ├── goal_step_wikihow.yaml │ │ │ │ │ ├── gre_reading_comprehension.yaml │ │ │ │ │ ├── hhh_alignment.yaml │ │ │ │ │ ├── hindu_knowledge.yaml │ │ │ │ │ ├── hinglish_toxicity.yaml │ │ │ │ │ ├── human_organs_senses.yaml │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ ├── identify_math_theorems.yaml │ │ │ │ │ ├── identify_odd_metaphor.yaml │ │ │ │ │ ├── implicatures.yaml │ │ │ │ │ ├── implicit_relations.yaml │ │ │ │ │ ├── intent_recognition.yaml │ │ │ │ │ ├── international_phonetic_alphabet_nli.yaml │ │ │ │ │ ├── intersect_geometry.yaml │ │ │ │ │ ├── irony_identification.yaml │ │ │ │ │ ├── kanji_ascii.yaml │ │ │ │ │ ├── kannada.yaml │ │ │ │ │ ├── key_value_maps.yaml │ │ │ │ │ ├── known_unknowns.yaml │ │ │ │ │ ├── language_identification.yaml │ │ │ │ │ ├── logic_grid_puzzle.yaml │ │ │ │ │ ├── logical_args.yaml │ │ │ │ │ ├── logical_deduction.yaml │ │ │ │ │ ├── logical_fallacy_detection.yaml │ │ │ │ │ ├── logical_sequence.yaml │ │ │ │ │ ├── mathematical_induction.yaml │ │ │ │ │ ├── metaphor_boolean.yaml │ │ │ │ │ ├── metaphor_understanding.yaml │ │ │ │ │ ├── misconceptions.yaml │ │ │ │ │ ├── misconceptions_russian.yaml │ │ │ │ │ ├── mnist_ascii.yaml │ │ │ │ │ ├── moral_permissibility.yaml │ │ │ │ │ ├── movie_dialog_same_or_different.yaml │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ ├── multiemo.yaml │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ ├── nonsense_words_grammar.yaml │ │ │ │ │ ├── novel_concepts.yaml │ │ │ │ │ ├── odd_one_out.yaml │ │ │ │ │ ├── parsinlu_qa.yaml │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ ├── periodic_elements.yaml │ │ │ │ │ ├── persian_idioms.yaml │ │ │ │ │ ├── phrase_relatedness.yaml │ │ │ │ │ ├── physical_intuition.yaml │ │ │ │ │ ├── physics.yaml │ │ │ │ │ ├── play_dialog_same_or_different.yaml │ │ │ │ │ ├── presuppositions_as_nli.yaml │ │ │ │ │ ├── question_selection.yaml │ │ │ │ │ ├── real_or_fake_text.yaml │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ ├── riddle_sense.yaml │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ ├── sentence_ambiguity.yaml │ │ │ │ │ ├── similarities_abstraction.yaml │ │ │ │ │ ├── simple_ethical_questions.yaml │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ ├── social_iqa.yaml │ │ │ │ │ ├── social_support.yaml │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ ├── strange_stories.yaml │ │ │ │ │ ├── strategyqa.yaml │ │ │ │ │ ├── suicide_risk.yaml │ │ │ │ │ ├── swahili_english_proverbs.yaml │ │ │ │ │ ├── swedish_to_german_proverbs.yaml │ │ │ │ │ ├── symbol_interpretation.yaml │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ ├── timedial.yaml │ │ │ │ │ ├── tracking_shuffled_objects.yaml │ │ │ │ │ ├── understanding_fables.yaml │ │ │ │ │ ├── undo_permutation.yaml │ │ │ │ │ ├── unit_conversion.yaml │ │ │ │ │ ├── unit_interpretation.yaml │ │ │ │ │ ├── vitaminc_fact_verification.yaml │ │ │ │ │ ├── what_is_the_tao.yaml │ │ │ │ │ ├── which_wiki_edit.yaml │ │ │ │ │ └── winowhy.yaml │ │ │ │ ├── multiple_choice_template_a_yaml │ │ │ │ ├── multiple_choice_template_b_yaml │ │ │ │ └── push_bigbench_dataset.py │ │ │ ├── blimp │ │ │ │ ├── README.md │ │ │ │ ├── _blimp.yaml │ │ │ │ ├── _template_yaml │ │ │ │ ├── adjunct_island.yaml │ │ │ │ ├── anaphor_gender_agreement.yaml │ │ │ │ ├── anaphor_number_agreement.yaml │ │ │ │ ├── animate_subject_passive.yaml │ │ │ │ ├── animate_subject_trans.yaml │ │ │ │ ├── causative.yaml │ │ │ │ ├── complex_NP_island.yaml │ │ │ │ ├── coordinate_structure_constraint_complex_left_branch.yaml │ │ │ │ ├── coordinate_structure_constraint_object_extraction.yaml │ │ │ │ ├── determiner_noun_agreement_1.yaml │ │ │ │ ├── determiner_noun_agreement_2.yaml │ │ │ │ ├── determiner_noun_agreement_irregular_1.yaml │ │ │ │ ├── determiner_noun_agreement_irregular_2.yaml │ │ │ │ ├── determiner_noun_agreement_with_adj_2.yaml │ │ │ │ ├── determiner_noun_agreement_with_adj_irregular_1.yaml │ │ │ │ ├── determiner_noun_agreement_with_adj_irregular_2.yaml │ │ │ │ ├── determiner_noun_agreement_with_adjective_1.yaml │ │ │ │ ├── distractor_agreement_relational_noun.yaml │ │ │ │ ├── distractor_agreement_relative_clause.yaml │ │ │ │ ├── drop_argument.yaml │ │ │ │ ├── ellipsis_n_bar_1.yaml │ │ │ │ ├── ellipsis_n_bar_2.yaml │ │ │ │ ├── existential_there_object_raising.yaml │ │ │ │ ├── existential_there_quantifiers_1.yaml │ │ │ │ ├── existential_there_quantifiers_2.yaml │ │ │ │ ├── existential_there_subject_raising.yaml │ │ │ │ ├── expletive_it_object_raising.yaml │ │ │ │ ├── generate_configs.py │ │ │ │ ├── inchoative.yaml │ │ │ │ ├── intransitive.yaml │ │ │ │ ├── irregular_past_participle_adjectives.yaml │ │ │ │ ├── irregular_past_participle_verbs.yaml │ │ │ │ ├── irregular_plural_subject_verb_agreement_1.yaml │ │ │ │ ├── irregular_plural_subject_verb_agreement_2.yaml │ │ │ │ ├── left_branch_island_echo_question.yaml │ │ │ │ ├── left_branch_island_simple_question.yaml │ │ │ │ ├── matrix_question_npi_licensor_present.yaml │ │ │ │ ├── npi_present_1.yaml │ │ │ │ ├── npi_present_2.yaml │ │ │ │ ├── only_npi_licensor_present.yaml │ │ │ │ ├── only_npi_scope.yaml │ │ │ │ ├── passive_1.yaml │ │ │ │ ├── passive_2.yaml │ │ │ │ ├── principle_A_c_command.yaml │ │ │ │ ├── principle_A_case_1.yaml │ │ │ │ ├── principle_A_case_2.yaml │ │ │ │ ├── principle_A_domain_1.yaml │ │ │ │ ├── principle_A_domain_2.yaml │ │ │ │ ├── principle_A_domain_3.yaml │ │ │ │ ├── principle_A_reconstruction.yaml │ │ │ │ ├── regular_plural_subject_verb_agreement_1.yaml │ │ │ │ ├── regular_plural_subject_verb_agreement_2.yaml │ │ │ │ ├── sentential_negation_npi_licensor_present.yaml │ │ │ │ ├── sentential_negation_npi_scope.yaml │ │ │ │ ├── sentential_subject_island.yaml │ │ │ │ ├── superlative_quantifiers_1.yaml │ │ │ │ ├── superlative_quantifiers_2.yaml │ │ │ │ ├── tough_vs_raising_1.yaml │ │ │ │ ├── tough_vs_raising_2.yaml │ │ │ │ ├── transitive.yaml │ │ │ │ ├── wh_island.yaml │ │ │ │ ├── wh_questions_object_gap.yaml │ │ │ │ ├── wh_questions_subject_gap.yaml │ │ │ │ ├── wh_questions_subject_gap_long_distance.yaml │ │ │ │ ├── wh_vs_that_no_gap.yaml │ │ │ │ ├── wh_vs_that_no_gap_long_distance.yaml │ │ │ │ ├── wh_vs_that_with_gap.yaml │ │ │ │ └── wh_vs_that_with_gap_long_distance.yaml │ │ │ ├── catalan_bench │ │ │ │ ├── README.md │ │ │ │ ├── _arc_ca_common_yaml │ │ │ │ ├── _cabreu_common_yaml │ │ │ │ ├── arc_ca_challenge.yaml │ │ │ │ ├── arc_ca_easy.yaml │ │ │ │ ├── cabreu_abstractive.yaml │ │ │ │ ├── cabreu_extractive.yaml │ │ │ │ ├── cabreu_extreme.yaml │ │ │ │ ├── catalan_bench.yaml │ │ │ │ ├── catalanqa.yaml │ │ │ │ ├── catcola.yaml │ │ │ │ ├── copa_ca.yaml │ │ │ │ ├── coqcat.yaml │ │ │ │ ├── flores_ca │ │ │ │ │ ├── _flores_common_yaml │ │ │ │ │ ├── create_yamls_flores_ca.py │ │ │ │ │ ├── flores_ca-de.yaml │ │ │ │ │ ├── flores_ca-en.yaml │ │ │ │ │ ├── flores_ca-es.yaml │ │ │ │ │ ├── flores_ca-eu.yaml │ │ │ │ │ ├── flores_ca-fr.yaml │ │ │ │ │ ├── flores_ca-gl.yaml │ │ │ │ │ ├── flores_ca-it.yaml │ │ │ │ │ ├── flores_ca-pt.yaml │ │ │ │ │ ├── flores_ca.yaml │ │ │ │ │ ├── flores_de-ca.yaml │ │ │ │ │ ├── flores_en-ca.yaml │ │ │ │ │ ├── flores_es-ca.yaml │ │ │ │ │ ├── flores_eu-ca.yaml │ │ │ │ │ ├── flores_fr-ca.yaml │ │ │ │ │ ├── flores_gl-ca.yaml │ │ │ │ │ ├── flores_it-ca.yaml │ │ │ │ │ └── flores_pt-ca.yaml │ │ │ │ ├── mgsm_direct_ca.yaml │ │ │ │ ├── openbookqa_ca.yaml │ │ │ │ ├── parafraseja.yaml │ │ │ │ ├── paws_ca.yaml │ │ │ │ ├── phrases_va │ │ │ │ │ ├── _phrases_va_common.yaml │ │ │ │ │ ├── phrases_ca-va.yaml │ │ │ │ │ └── phrases_va-ca.yaml │ │ │ │ ├── piqa_ca.yaml │ │ │ │ ├── siqa_ca.yaml │ │ │ │ ├── teca.yaml │ │ │ │ ├── utils.py │ │ │ │ ├── wnli_ca.yaml │ │ │ │ ├── xnli_ca.yaml │ │ │ │ ├── xquad_ca.yaml │ │ │ │ └── xstorycloze_ca.yaml │ │ │ ├── ceval │ │ │ │ ├── README.md │ │ │ │ ├── _ceval-valid.yaml │ │ │ │ ├── _default_ceval_yaml │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── ceval-valid_accountant.yaml │ │ │ │ ├── ceval-valid_advanced_mathematics.yaml │ │ │ │ ├── ceval-valid_art_studies.yaml │ │ │ │ ├── ceval-valid_basic_medicine.yaml │ │ │ │ ├── ceval-valid_business_administration.yaml │ │ │ │ ├── ceval-valid_chinese_language_and_literature.yaml │ │ │ │ ├── ceval-valid_civil_servant.yaml │ │ │ │ ├── ceval-valid_clinical_medicine.yaml │ │ │ │ ├── ceval-valid_college_chemistry.yaml │ │ │ │ ├── ceval-valid_college_economics.yaml │ │ │ │ ├── ceval-valid_college_physics.yaml │ │ │ │ ├── ceval-valid_college_programming.yaml │ │ │ │ ├── ceval-valid_computer_architecture.yaml │ │ │ │ ├── ceval-valid_computer_network.yaml │ │ │ │ ├── ceval-valid_discrete_mathematics.yaml │ │ │ │ ├── ceval-valid_education_science.yaml │ │ │ │ ├── ceval-valid_electrical_engineer.yaml │ │ │ │ ├── ceval-valid_environmental_impact_assessment_engineer.yaml │ │ │ │ ├── ceval-valid_fire_engineer.yaml │ │ │ │ ├── ceval-valid_high_school_biology.yaml │ │ │ │ ├── ceval-valid_high_school_chemistry.yaml │ │ │ │ ├── ceval-valid_high_school_chinese.yaml │ │ │ │ ├── ceval-valid_high_school_geography.yaml │ │ │ │ ├── ceval-valid_high_school_history.yaml │ │ │ │ ├── ceval-valid_high_school_mathematics.yaml │ │ │ │ ├── ceval-valid_high_school_physics.yaml │ │ │ │ ├── ceval-valid_high_school_politics.yaml │ │ │ │ ├── ceval-valid_ideological_and_moral_cultivation.yaml │ │ │ │ ├── ceval-valid_law.yaml │ │ │ │ ├── ceval-valid_legal_professional.yaml │ │ │ │ ├── ceval-valid_logic.yaml │ │ │ │ ├── ceval-valid_mao_zedong_thought.yaml │ │ │ │ ├── ceval-valid_marxism.yaml │ │ │ │ ├── ceval-valid_metrology_engineer.yaml │ │ │ │ ├── ceval-valid_middle_school_biology.yaml │ │ │ │ ├── ceval-valid_middle_school_chemistry.yaml │ │ │ │ ├── ceval-valid_middle_school_geography.yaml │ │ │ │ ├── ceval-valid_middle_school_history.yaml │ │ │ │ ├── ceval-valid_middle_school_mathematics.yaml │ │ │ │ ├── ceval-valid_middle_school_physics.yaml │ │ │ │ ├── ceval-valid_middle_school_politics.yaml │ │ │ │ ├── ceval-valid_modern_chinese_history.yaml │ │ │ │ ├── ceval-valid_operating_system.yaml │ │ │ │ ├── ceval-valid_physician.yaml │ │ │ │ ├── ceval-valid_plant_protection.yaml │ │ │ │ ├── ceval-valid_probability_and_statistics.yaml │ │ │ │ ├── ceval-valid_professional_tour_guide.yaml │ │ │ │ ├── ceval-valid_sports_science.yaml │ │ │ │ ├── ceval-valid_tax_accountant.yaml │ │ │ │ ├── ceval-valid_teacher_qualification.yaml │ │ │ │ ├── ceval-valid_urban_and_rural_planner.yaml │ │ │ │ └── ceval-valid_veterinary_medicine.yaml │ │ │ ├── cmmlu │ │ │ │ ├── README.md │ │ │ │ ├── _cmmlu.yaml │ │ │ │ ├── _default_template_yaml │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── cmmlu_agronomy.yaml │ │ │ │ ├── cmmlu_anatomy.yaml │ │ │ │ ├── cmmlu_ancient_chinese.yaml │ │ │ │ ├── cmmlu_arts.yaml │ │ │ │ ├── cmmlu_astronomy.yaml │ │ │ │ ├── cmmlu_business_ethics.yaml │ │ │ │ ├── cmmlu_chinese_civil_service_exam.yaml │ │ │ │ ├── cmmlu_chinese_driving_rule.yaml │ │ │ │ ├── cmmlu_chinese_food_culture.yaml │ │ │ │ ├── cmmlu_chinese_foreign_policy.yaml │ │ │ │ ├── cmmlu_chinese_history.yaml │ │ │ │ ├── cmmlu_chinese_literature.yaml │ │ │ │ ├── cmmlu_chinese_teacher_qualification.yaml │ │ │ │ ├── cmmlu_clinical_knowledge.yaml │ │ │ │ ├── cmmlu_college_actuarial_science.yaml │ │ │ │ ├── cmmlu_college_education.yaml │ │ │ │ ├── cmmlu_college_engineering_hydrology.yaml │ │ │ │ ├── cmmlu_college_law.yaml │ │ │ │ ├── cmmlu_college_mathematics.yaml │ │ │ │ ├── cmmlu_college_medical_statistics.yaml │ │ │ │ ├── cmmlu_college_medicine.yaml │ │ │ │ ├── cmmlu_computer_science.yaml │ │ │ │ ├── cmmlu_computer_security.yaml │ │ │ │ ├── cmmlu_conceptual_physics.yaml │ │ │ │ ├── cmmlu_construction_project_management.yaml │ │ │ │ ├── cmmlu_default_agronomy.yaml │ │ │ │ ├── cmmlu_default_anatomy.yaml │ │ │ │ ├── cmmlu_default_ancient_chinese.yaml │ │ │ │ ├── cmmlu_default_arts.yaml │ │ │ │ ├── cmmlu_default_astronomy.yaml │ │ │ │ ├── cmmlu_default_business_ethics.yaml │ │ │ │ ├── cmmlu_default_chinese_civil_service_exam.yaml │ │ │ │ ├── cmmlu_default_chinese_driving_rule.yaml │ │ │ │ ├── cmmlu_default_chinese_food_culture.yaml │ │ │ │ ├── cmmlu_default_chinese_foreign_policy.yaml │ │ │ │ ├── cmmlu_default_chinese_history.yaml │ │ │ │ ├── cmmlu_default_chinese_literature.yaml │ │ │ │ ├── cmmlu_default_chinese_teacher_qualification.yaml │ │ │ │ ├── cmmlu_default_clinical_knowledge.yaml │ │ │ │ ├── cmmlu_default_college_actuarial_science.yaml │ │ │ │ ├── cmmlu_default_college_education.yaml │ │ │ │ ├── cmmlu_default_college_engineering_hydrology.yaml │ │ │ │ ├── cmmlu_default_college_law.yaml │ │ │ │ ├── cmmlu_default_college_mathematics.yaml │ │ │ │ ├── cmmlu_default_college_medical_statistics.yaml │ │ │ │ ├── cmmlu_default_college_medicine.yaml │ │ │ │ ├── cmmlu_default_computer_science.yaml │ │ │ │ ├── cmmlu_default_computer_security.yaml │ │ │ │ ├── cmmlu_default_conceptual_physics.yaml │ │ │ │ ├── cmmlu_default_construction_project_management.yaml │ │ │ │ ├── cmmlu_default_economics.yaml │ │ │ │ ├── cmmlu_default_education.yaml │ │ │ │ ├── cmmlu_default_electrical_engineering.yaml │ │ │ │ ├── cmmlu_default_elementary_chinese.yaml │ │ │ │ ├── cmmlu_default_elementary_commonsense.yaml │ │ │ │ ├── cmmlu_default_elementary_information_and_technology.yaml │ │ │ │ ├── cmmlu_default_elementary_mathematics.yaml │ │ │ │ ├── cmmlu_default_ethnology.yaml │ │ │ │ ├── cmmlu_default_food_science.yaml │ │ │ │ ├── cmmlu_default_genetics.yaml │ │ │ │ ├── cmmlu_default_global_facts.yaml │ │ │ │ ├── cmmlu_default_high_school_biology.yaml │ │ │ │ ├── cmmlu_default_high_school_chemistry.yaml │ │ │ │ ├── cmmlu_default_high_school_geography.yaml │ │ │ │ ├── cmmlu_default_high_school_mathematics.yaml │ │ │ │ ├── cmmlu_default_high_school_physics.yaml │ │ │ │ ├── cmmlu_default_high_school_politics.yaml │ │ │ │ ├── cmmlu_default_human_sexuality.yaml │ │ │ │ ├── cmmlu_default_international_law.yaml │ │ │ │ ├── cmmlu_default_journalism.yaml │ │ │ │ ├── cmmlu_default_jurisprudence.yaml │ │ │ │ ├── cmmlu_default_legal_and_moral_basis.yaml │ │ │ │ ├── cmmlu_default_logical.yaml │ │ │ │ ├── cmmlu_default_machine_learning.yaml │ │ │ │ ├── cmmlu_default_management.yaml │ │ │ │ ├── cmmlu_default_marketing.yaml │ │ │ │ ├── cmmlu_default_marxist_theory.yaml │ │ │ │ ├── cmmlu_default_modern_chinese.yaml │ │ │ │ ├── cmmlu_default_nutrition.yaml │ │ │ │ ├── cmmlu_default_philosophy.yaml │ │ │ │ ├── cmmlu_default_professional_accounting.yaml │ │ │ │ ├── cmmlu_default_professional_law.yaml │ │ │ │ ├── cmmlu_default_professional_medicine.yaml │ │ │ │ ├── cmmlu_default_professional_psychology.yaml │ │ │ │ ├── cmmlu_default_public_relations.yaml │ │ │ │ ├── cmmlu_default_security_study.yaml │ │ │ │ ├── cmmlu_default_sociology.yaml │ │ │ │ ├── cmmlu_default_sports_science.yaml │ │ │ │ ├── cmmlu_default_traditional_chinese_medicine.yaml │ │ │ │ ├── cmmlu_default_virology.yaml │ │ │ │ ├── cmmlu_default_world_history.yaml │ │ │ │ ├── cmmlu_default_world_religions.yaml │ │ │ │ ├── cmmlu_economics.yaml │ │ │ │ ├── cmmlu_education.yaml │ │ │ │ ├── cmmlu_electrical_engineering.yaml │ │ │ │ ├── cmmlu_elementary_chinese.yaml │ │ │ │ ├── cmmlu_elementary_commonsense.yaml │ │ │ │ ├── cmmlu_elementary_information_and_technology.yaml │ │ │ │ ├── cmmlu_elementary_mathematics.yaml │ │ │ │ ├── cmmlu_ethnology.yaml │ │ │ │ ├── cmmlu_food_science.yaml │ │ │ │ ├── cmmlu_genetics.yaml │ │ │ │ ├── cmmlu_global_facts.yaml │ │ │ │ ├── cmmlu_high_school_biology.yaml │ │ │ │ ├── cmmlu_high_school_chemistry.yaml │ │ │ │ ├── cmmlu_high_school_geography.yaml │ │ │ │ ├── cmmlu_high_school_mathematics.yaml │ │ │ │ ├── cmmlu_high_school_physics.yaml │ │ │ │ ├── cmmlu_high_school_politics.yaml │ │ │ │ ├── cmmlu_human_sexuality.yaml │ │ │ │ ├── cmmlu_international_law.yaml │ │ │ │ ├── cmmlu_journalism.yaml │ │ │ │ ├── cmmlu_jurisprudence.yaml │ │ │ │ ├── cmmlu_legal_and_moral_basis.yaml │ │ │ │ ├── cmmlu_logical.yaml │ │ │ │ ├── cmmlu_machine_learning.yaml │ │ │ │ ├── cmmlu_management.yaml │ │ │ │ ├── cmmlu_marketing.yaml │ │ │ │ ├── cmmlu_marxist_theory.yaml │ │ │ │ ├── cmmlu_modern_chinese.yaml │ │ │ │ ├── cmmlu_nutrition.yaml │ │ │ │ ├── cmmlu_philosophy.yaml │ │ │ │ ├── cmmlu_professional_accounting.yaml │ │ │ │ ├── cmmlu_professional_law.yaml │ │ │ │ ├── cmmlu_professional_medicine.yaml │ │ │ │ ├── cmmlu_professional_psychology.yaml │ │ │ │ ├── cmmlu_public_relations.yaml │ │ │ │ ├── cmmlu_security_study.yaml │ │ │ │ ├── cmmlu_sociology.yaml │ │ │ │ ├── cmmlu_sports_science.yaml │ │ │ │ ├── cmmlu_traditional_chinese_medicine.yaml │ │ │ │ ├── cmmlu_virology.yaml │ │ │ │ ├── cmmlu_world_history.yaml │ │ │ │ └── cmmlu_world_religions.yaml │ │ │ ├── code_x_glue │ │ │ │ └── code-text │ │ │ │ │ ├── bleu.py │ │ │ │ │ ├── go.yaml │ │ │ │ │ ├── java.yaml │ │ │ │ │ ├── javascript.yaml │ │ │ │ │ ├── php.yaml │ │ │ │ │ ├── python.yaml │ │ │ │ │ ├── ruby.yaml │ │ │ │ │ └── utils.py │ │ │ ├── commonsense_qa │ │ │ │ ├── README.md │ │ │ │ └── default.yaml │ │ │ ├── copal_id │ │ │ │ ├── README.md │ │ │ │ ├── colloquial.yaml │ │ │ │ ├── standard.yaml │ │ │ │ └── utils.py │ │ │ ├── coqa │ │ │ │ ├── README.md │ │ │ │ ├── default.yaml │ │ │ │ └── utils.py │ │ │ ├── crows_pairs │ │ │ │ ├── README.md │ │ │ │ ├── crows_pairs_english.yaml │ │ │ │ ├── crows_pairs_english_age.yaml │ │ │ │ ├── crows_pairs_english_autre.yaml │ │ │ │ ├── crows_pairs_english_disability.yaml │ │ │ │ ├── crows_pairs_english_gender.yaml │ │ │ │ ├── crows_pairs_english_nationality.yaml │ │ │ │ ├── crows_pairs_english_physical_appearance.yaml │ │ │ │ ├── crows_pairs_english_race_color.yaml │ │ │ │ ├── crows_pairs_english_religion.yaml │ │ │ │ ├── crows_pairs_english_sexual_orientation.yaml │ │ │ │ ├── crows_pairs_english_socioeconomic.yaml │ │ │ │ ├── crows_pairs_french.yaml │ │ │ │ ├── crows_pairs_french_age.yaml │ │ │ │ ├── crows_pairs_french_autre.yaml │ │ │ │ ├── crows_pairs_french_disability.yaml │ │ │ │ ├── crows_pairs_french_gender.yaml │ │ │ │ ├── crows_pairs_french_nationality.yaml │ │ │ │ ├── crows_pairs_french_physical_appearance.yaml │ │ │ │ ├── crows_pairs_french_race_color.yaml │ │ │ │ ├── crows_pairs_french_religion.yaml │ │ │ │ ├── crows_pairs_french_sexual_orientation.yaml │ │ │ │ ├── crows_pairs_french_socioeconomic.yaml │ │ │ │ └── utils.py │ │ │ ├── csatqa │ │ │ │ ├── _csatqa.yaml │ │ │ │ ├── _default_csatqa_yaml │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── csatqa_gr.yaml │ │ │ │ ├── csatqa_li.yaml │ │ │ │ ├── csatqa_rch.yaml │ │ │ │ ├── csatqa_rcs.yaml │ │ │ │ ├── csatqa_rcss.yaml │ │ │ │ ├── csatqa_wr.yaml │ │ │ │ └── utils.py │ │ │ ├── drop │ │ │ │ ├── README.md │ │ │ │ ├── default.yaml │ │ │ │ └── utils.py │ │ │ ├── eq_bench │ │ │ │ ├── README.md │ │ │ │ ├── default.yaml │ │ │ │ └── utils.py │ │ │ ├── eus_exams │ │ │ │ ├── README.md │ │ │ │ ├── configs.py │ │ │ │ ├── eus_exams │ │ │ │ ├── eus_exams_es │ │ │ │ ├── eus_exams_es_ejadministrativo.yaml │ │ │ │ ├── eus_exams_es_ejauxiliar.yaml │ │ │ │ ├── eus_exams_es_ejsubalterno.yaml │ │ │ │ ├── eus_exams_es_ejtecnico.yaml │ │ │ │ ├── eus_exams_es_opeayuntamientovitoria.yaml │ │ │ │ ├── eus_exams_es_opebilbao.yaml │ │ │ │ ├── eus_exams_es_opeehuadmin.yaml │ │ │ │ ├── eus_exams_es_opeehuaux.yaml │ │ │ │ ├── eus_exams_es_opeehubiblio.yaml │ │ │ │ ├── eus_exams_es_opeehuderecho.yaml │ │ │ │ ├── eus_exams_es_opeehueconomicas.yaml │ │ │ │ ├── eus_exams_es_opeehuempresariales.yaml │ │ │ │ ├── eus_exams_es_opeehusubalterno.yaml │ │ │ │ ├── eus_exams_es_opeehutecnico.yaml │ │ │ │ ├── eus_exams_es_opeehutecnicob.yaml │ │ │ │ ├── eus_exams_es_opeosakiadmin.yaml │ │ │ │ ├── eus_exams_es_opeosakiaux.yaml │ │ │ │ ├── eus_exams_es_opeosakiauxenf.yaml │ │ │ │ ├── eus_exams_es_opeosakicelador.yaml │ │ │ │ ├── eus_exams_es_opeosakienf.yaml │ │ │ │ ├── eus_exams_es_opeosakijuridico.yaml │ │ │ │ ├── eus_exams_es_opeosakioperario.yaml │ │ │ │ ├── eus_exams_es_opeosakitecnico.yaml │ │ │ │ ├── eus_exams_es_opeosakivarios.yaml │ │ │ │ ├── eus_exams_es_osakidetza1c.yaml │ │ │ │ ├── eus_exams_es_osakidetza2c.yaml │ │ │ │ ├── eus_exams_es_osakidetza3c.yaml │ │ │ │ ├── eus_exams_es_osakidetza4c.yaml │ │ │ │ ├── eus_exams_es_osakidetza5c.yaml │ │ │ │ ├── eus_exams_es_osakidetza6c.yaml │ │ │ │ ├── eus_exams_es_osakidetza7c.yaml │ │ │ │ ├── eus_exams_es_osakidetza8c.yaml │ │ │ │ ├── eus_exams_es_osakidetza9c.yaml │ │ │ │ ├── eus_exams_eu │ │ │ │ ├── eus_exams_eu_ejadministrari.yaml │ │ │ │ ├── eus_exams_eu_ejlaguntza.yaml │ │ │ │ ├── eus_exams_eu_ejlaguntzaile.yaml │ │ │ │ ├── eus_exams_eu_ejteknikari.yaml │ │ │ │ ├── eus_exams_eu_opebilbaoeu.yaml │ │ │ │ ├── eus_exams_eu_opeehuadmineu.yaml │ │ │ │ ├── eus_exams_eu_opeehuauxeu.yaml │ │ │ │ ├── eus_exams_eu_opeehubiblioeu.yaml │ │ │ │ ├── eus_exams_eu_opeehuderechoeu.yaml │ │ │ │ ├── eus_exams_eu_opeehueconomicaseu.yaml │ │ │ │ ├── eus_exams_eu_opeehuempresarialeseu.yaml │ │ │ │ ├── eus_exams_eu_opeehusubalternoeu.yaml │ │ │ │ ├── eus_exams_eu_opeehutecnicoeu.yaml │ │ │ │ ├── eus_exams_eu_opeehuteknikarib.yaml │ │ │ │ ├── eus_exams_eu_opegasteizkoudala.yaml │ │ │ │ ├── eus_exams_eu_opeosakiadmineu.yaml │ │ │ │ ├── eus_exams_eu_opeosakiauxenfeu.yaml │ │ │ │ ├── eus_exams_eu_opeosakiauxeu.yaml │ │ │ │ ├── eus_exams_eu_opeosakiceladoreu.yaml │ │ │ │ ├── eus_exams_eu_opeosakienfeu.yaml │ │ │ │ ├── eus_exams_eu_opeosakioperarioeu.yaml │ │ │ │ ├── eus_exams_eu_opeosakitecnicoeu.yaml │ │ │ │ ├── eus_exams_eu_opeosakivarioseu.yaml │ │ │ │ ├── eus_exams_eu_osakidetza1e.yaml │ │ │ │ ├── eus_exams_eu_osakidetza2e.yaml │ │ │ │ ├── eus_exams_eu_osakidetza3e.yaml │ │ │ │ ├── eus_exams_eu_osakidetza5e.yaml │ │ │ │ ├── eus_exams_eu_osakidetza6e.yaml │ │ │ │ ├── eus_exams_eu_osakidetza7e.yaml │ │ │ │ └── utils.py │ │ │ ├── eus_proficiency │ │ │ │ ├── README.md │ │ │ │ └── eus_proficiency.yaml │ │ │ ├── eus_reading │ │ │ │ ├── README.md │ │ │ │ ├── eus_reading.yaml │ │ │ │ └── utils.py │ │ │ ├── eus_trivia │ │ │ │ ├── README.md │ │ │ │ ├── eus_trivia.yaml │ │ │ │ └── utils.py │ │ │ ├── fda │ │ │ │ ├── README.md │ │ │ │ ├── fda.yaml │ │ │ │ └── task.py │ │ │ ├── fld │ │ │ │ ├── README.md │ │ │ │ ├── fld_default.yaml │ │ │ │ ├── fld_logical_formula_default.yaml │ │ │ │ ├── fld_logical_formula_star.yaml │ │ │ │ └── fld_star.yaml │ │ │ ├── french_bench │ │ │ │ ├── README.md │ │ │ │ ├── _default_template_yaml │ │ │ │ ├── french_bench_arc_challenge.yaml │ │ │ │ ├── french_bench_boolqa.yaml │ │ │ │ ├── french_bench_fquadv2.yaml │ │ │ │ ├── french_bench_fquadv2_bool.yaml │ │ │ │ ├── french_bench_fquadv2_genq.yaml │ │ │ │ ├── french_bench_fquadv2_hasAns.yaml │ │ │ │ ├── french_bench_grammar.yaml │ │ │ │ ├── french_bench_hellaswag.yaml │ │ │ │ ├── french_bench_multifquad.yaml │ │ │ │ ├── french_bench_opus_perplexity.yaml │ │ │ │ ├── french_bench_orangesum_abstract.yaml │ │ │ │ ├── french_bench_orangesum_title.yaml │ │ │ │ ├── french_bench_reading_comp.yaml │ │ │ │ ├── french_bench_topic_based_nli.yaml │ │ │ │ ├── french_bench_trivia.yaml │ │ │ │ ├── french_bench_vocab.yaml │ │ │ │ ├── french_bench_wikitext_fr.yaml │ │ │ │ ├── french_bench_xnli.yaml │ │ │ │ ├── preprocess_wikitext.py │ │ │ │ └── utils.py │ │ │ ├── galician_bench │ │ │ │ ├── README.md │ │ │ │ ├── belebele_glg_Latn.yaml │ │ │ │ ├── flores_gl │ │ │ │ │ ├── _flores_common_yaml │ │ │ │ │ ├── create_yamls_flores_gl.py │ │ │ │ │ ├── flores_ca-gl.yaml │ │ │ │ │ ├── flores_de-gl.yaml │ │ │ │ │ ├── flores_en-gl.yaml │ │ │ │ │ ├── flores_es-gl.yaml │ │ │ │ │ ├── flores_eu-gl.yaml │ │ │ │ │ ├── flores_fr-gl.yaml │ │ │ │ │ ├── flores_gl-ca.yaml │ │ │ │ │ ├── flores_gl-de.yaml │ │ │ │ │ ├── flores_gl-en.yaml │ │ │ │ │ ├── flores_gl-es.yaml │ │ │ │ │ ├── flores_gl-eu.yaml │ │ │ │ │ ├── flores_gl-fr.yaml │ │ │ │ │ ├── flores_gl-it.yaml │ │ │ │ │ ├── flores_gl-pt.yaml │ │ │ │ │ ├── flores_gl.yaml │ │ │ │ │ ├── flores_it-gl.yaml │ │ │ │ │ └── flores_pt-gl.yaml │ │ │ │ ├── galcola.yaml │ │ │ │ ├── galician_bench.yaml │ │ │ │ ├── mgsm_direct_gl.yaml │ │ │ │ ├── openbookqa_gl.yaml │ │ │ │ ├── parafrases_gl.yaml │ │ │ │ ├── paws_gl.yaml │ │ │ │ ├── summarization_gl.yaml │ │ │ │ ├── truthfulqa_gl_gen.yaml │ │ │ │ ├── truthfulqa_gl_mc1.yaml │ │ │ │ ├── truthfulqa_gl_mc2.yaml │ │ │ │ ├── utils.py │ │ │ │ ├── xnli_gl.yaml │ │ │ │ └── xstorycloze_gl.yaml │ │ │ ├── glianorex │ │ │ │ ├── README.md │ │ │ │ ├── glianorex.yaml │ │ │ │ ├── glianorex_en.yaml │ │ │ │ ├── glianorex_fr.yaml │ │ │ │ └── preprocess_glianorex.py │ │ │ ├── glue │ │ │ │ ├── README.md │ │ │ │ ├── cola │ │ │ │ │ └── default.yaml │ │ │ │ ├── mnli │ │ │ │ │ ├── default.yaml │ │ │ │ │ ├── mismatch.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── mrpc │ │ │ │ │ └── default.yaml │ │ │ │ ├── qnli │ │ │ │ │ └── default.yaml │ │ │ │ ├── qqp │ │ │ │ │ └── default.yaml │ │ │ │ ├── rte │ │ │ │ │ └── default.yaml │ │ │ │ ├── sst2 │ │ │ │ │ └── default.yaml │ │ │ │ └── wnli │ │ │ │ │ └── default.yaml │ │ │ ├── gpqa │ │ │ │ ├── README.md │ │ │ │ ├── cot_n_shot │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── _gpqa_cot_n_shot_yaml │ │ │ │ │ ├── gpqa_diamond_cot_n_shot.yaml │ │ │ │ │ ├── gpqa_extended_cot_n_shot.yaml │ │ │ │ │ ├── gpqa_main_cot_n_shot.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── cot_zeroshot │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── _gpqa_cot_zeroshot_yaml │ │ │ │ │ ├── gpqa_diamond_cot_zeroshot.yaml │ │ │ │ │ ├── gpqa_extended_cot_zeroshot.yaml │ │ │ │ │ ├── gpqa_main_cot_zeroshot.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── generative │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── _gpqa_generative_n_shot_yaml │ │ │ │ │ ├── gpqa_diamond_generative_n_shot.yaml │ │ │ │ │ ├── gpqa_extended_generative_n_shot.yaml │ │ │ │ │ ├── gpqa_main_generative_n_shot.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── n_shot │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── _gpqa_n_shot_yaml │ │ │ │ │ ├── gpqa_diamond_n_shot.yaml │ │ │ │ │ ├── gpqa_extended_n_shot.yaml │ │ │ │ │ ├── gpqa_main_n_shot.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── openai │ │ │ │ │ ├── gpqa_diamond_openai.yaml │ │ │ │ │ ├── gpqa_diamond_openai_agg64.yaml │ │ │ │ │ ├── gpqa_diamond_openai_maj64_cov64.yaml │ │ │ │ │ └── utils.py │ │ │ │ └── zeroshot │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── _gpqa_zeroshot_yaml │ │ │ │ │ ├── gpqa_diamond_zeroshot.yaml │ │ │ │ │ ├── gpqa_extended_zeroshot.yaml │ │ │ │ │ ├── gpqa_main_zeroshot.yaml │ │ │ │ │ └── utils.py │ │ │ ├── gsm8k │ │ │ │ ├── README.md │ │ │ │ ├── gsm8k-cot-llama.yaml │ │ │ │ ├── gsm8k-cot-self-consistency.yaml │ │ │ │ ├── gsm8k-cot-zeroshot.yaml │ │ │ │ ├── gsm8k-cot.yaml │ │ │ │ └── gsm8k.yaml │ │ │ ├── gsm_plus │ │ │ │ ├── README.md │ │ │ │ ├── gsm_plus.yaml │ │ │ │ └── gsm_plus_mini.yaml │ │ │ ├── haerae │ │ │ │ ├── README.md │ │ │ │ ├── _default_haerae_yaml │ │ │ │ ├── _haerae.yaml │ │ │ │ ├── haerae_gk.yaml │ │ │ │ ├── haerae_hi.yaml │ │ │ │ ├── haerae_lw.yaml │ │ │ │ ├── haerae_rw.yaml │ │ │ │ └── haerae_sn.yaml │ │ │ ├── headqa │ │ │ │ ├── README.md │ │ │ │ ├── headqa_en.yaml │ │ │ │ └── headqa_es.yaml │ │ │ ├── hellaswag │ │ │ │ ├── README.md │ │ │ │ ├── hellaswag.yaml │ │ │ │ └── utils.py │ │ │ ├── hendrycks_ethics │ │ │ │ ├── README.md │ │ │ │ ├── commonsense.yaml │ │ │ │ ├── deontology.yaml │ │ │ │ ├── justice.yaml │ │ │ │ ├── utilitarianism.yaml │ │ │ │ ├── utilitarianism_original_yaml │ │ │ │ ├── utils.py │ │ │ │ └── virtue.yaml │ │ │ ├── hendrycks_math │ │ │ │ ├── README.md │ │ │ │ ├── hendrycks_math.yaml │ │ │ │ ├── hendrycks_math_algebra.yaml │ │ │ │ ├── hendrycks_math_counting_and_prob.yaml │ │ │ │ ├── hendrycks_math_geometry.yaml │ │ │ │ ├── hendrycks_math_intermediate_algebra.yaml │ │ │ │ ├── hendrycks_math_num_theory.yaml │ │ │ │ ├── hendrycks_math_prealgebra.yaml │ │ │ │ ├── hendrycks_math_precalc.yaml │ │ │ │ └── utils.py │ │ │ ├── ifeval │ │ │ │ ├── README.md │ │ │ │ ├── ifeval.yaml │ │ │ │ ├── instructions.py │ │ │ │ ├── instructions_registry.py │ │ │ │ ├── instructions_util.py │ │ │ │ └── utils.py │ │ │ ├── inverse_scaling │ │ │ │ ├── README.md │ │ │ │ ├── _inverse_scaling_mc_yaml │ │ │ │ ├── _some_results │ │ │ │ ├── inverse_scaling_hindsight_neglect.yaml │ │ │ │ ├── inverse_scaling_into_the_unknown.yaml │ │ │ │ ├── inverse_scaling_memo_trap.yaml │ │ │ │ ├── inverse_scaling_modus_tollens.yaml │ │ │ │ ├── inverse_scaling_neqa.yaml │ │ │ │ ├── inverse_scaling_pattern_matching_suppression.yaml │ │ │ │ ├── inverse_scaling_quote_repetition.yaml │ │ │ │ ├── inverse_scaling_redefine_math.yaml │ │ │ │ ├── inverse_scaling_repetitive_algebra.yaml │ │ │ │ ├── inverse_scaling_sig_figs.yaml │ │ │ │ └── inverse_scaling_winobias_antistereotype.yaml │ │ │ ├── kmmlu │ │ │ │ ├── README.md │ │ │ │ ├── cot_hard │ │ │ │ │ ├── _cot_kmmlu_yaml │ │ │ │ │ ├── kmmlu_cot_hard_accounting.yaml │ │ │ │ │ ├── kmmlu_cot_hard_agricultural_sciences.yaml │ │ │ │ │ ├── kmmlu_cot_hard_aviation_engineering_and_maintenance.yaml │ │ │ │ │ ├── kmmlu_cot_hard_biology.yaml │ │ │ │ │ ├── kmmlu_cot_hard_chemical_engineering.yaml │ │ │ │ │ ├── kmmlu_cot_hard_chemistry.yaml │ │ │ │ │ ├── kmmlu_cot_hard_civil_engineering.yaml │ │ │ │ │ ├── kmmlu_cot_hard_computer_science.yaml │ │ │ │ │ ├── kmmlu_cot_hard_construction.yaml │ │ │ │ │ ├── kmmlu_cot_hard_criminal_law.yaml │ │ │ │ │ ├── kmmlu_cot_hard_ecology.yaml │ │ │ │ │ ├── kmmlu_cot_hard_economics.yaml │ │ │ │ │ ├── kmmlu_cot_hard_education.yaml │ │ │ │ │ ├── kmmlu_cot_hard_electrical_engineering.yaml │ │ │ │ │ ├── kmmlu_cot_hard_electronics_engineering.yaml │ │ │ │ │ ├── kmmlu_cot_hard_energy_management.yaml │ │ │ │ │ ├── kmmlu_cot_hard_environmental_science.yaml │ │ │ │ │ ├── kmmlu_cot_hard_fashion.yaml │ │ │ │ │ ├── kmmlu_cot_hard_food_processing.yaml │ │ │ │ │ ├── kmmlu_cot_hard_gas_technology_and_engineering.yaml │ │ │ │ │ ├── kmmlu_cot_hard_geomatics.yaml │ │ │ │ │ ├── kmmlu_cot_hard_health.yaml │ │ │ │ │ ├── kmmlu_cot_hard_industrial_engineer.yaml │ │ │ │ │ ├── kmmlu_cot_hard_information_technology.yaml │ │ │ │ │ ├── kmmlu_cot_hard_interior_architecture_and_design.yaml │ │ │ │ │ ├── kmmlu_cot_hard_korean_history.yaml │ │ │ │ │ ├── kmmlu_cot_hard_law.yaml │ │ │ │ │ ├── kmmlu_cot_hard_machine_design_and_manufacturing.yaml │ │ │ │ │ ├── kmmlu_cot_hard_management.yaml │ │ │ │ │ ├── kmmlu_cot_hard_maritime_engineering.yaml │ │ │ │ │ ├── kmmlu_cot_hard_marketing.yaml │ │ │ │ │ ├── kmmlu_cot_hard_materials_engineering.yaml │ │ │ │ │ ├── kmmlu_cot_hard_math.yaml │ │ │ │ │ ├── kmmlu_cot_hard_mechanical_engineering.yaml │ │ │ │ │ ├── kmmlu_cot_hard_nondestructive_testing.yaml │ │ │ │ │ ├── kmmlu_cot_hard_patent.yaml │ │ │ │ │ ├── kmmlu_cot_hard_political_science_and_sociology.yaml │ │ │ │ │ ├── kmmlu_cot_hard_psychology.yaml │ │ │ │ │ ├── kmmlu_cot_hard_public_safety.yaml │ │ │ │ │ ├── kmmlu_cot_hard_railway_and_automotive_engineering.yaml │ │ │ │ │ ├── kmmlu_cot_hard_real_estate.yaml │ │ │ │ │ ├── kmmlu_cot_hard_refrigerating_machinery.yaml │ │ │ │ │ ├── kmmlu_cot_hard_social_welfare.yaml │ │ │ │ │ ├── kmmlu_cot_hard_taxation.yaml │ │ │ │ │ └── kmmlu_cot_hard_telecommunications_and_wireless_technology.yaml │ │ │ │ ├── direct │ │ │ │ │ ├── _direct_kmmlu_yaml │ │ │ │ │ ├── kmmlu_direct_accounting.yaml │ │ │ │ │ ├── kmmlu_direct_agricultural_sciences.yaml │ │ │ │ │ ├── kmmlu_direct_aviation_engineering_and_maintenance.yaml │ │ │ │ │ ├── kmmlu_direct_biology.yaml │ │ │ │ │ ├── kmmlu_direct_chemical_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_chemistry.yaml │ │ │ │ │ ├── kmmlu_direct_civil_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_computer_science.yaml │ │ │ │ │ ├── kmmlu_direct_construction.yaml │ │ │ │ │ ├── kmmlu_direct_criminal_law.yaml │ │ │ │ │ ├── kmmlu_direct_ecology.yaml │ │ │ │ │ ├── kmmlu_direct_economics.yaml │ │ │ │ │ ├── kmmlu_direct_education.yaml │ │ │ │ │ ├── kmmlu_direct_electrical_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_electronics_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_energy_management.yaml │ │ │ │ │ ├── kmmlu_direct_environmental_science.yaml │ │ │ │ │ ├── kmmlu_direct_fashion.yaml │ │ │ │ │ ├── kmmlu_direct_food_processing.yaml │ │ │ │ │ ├── kmmlu_direct_gas_technology_and_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_geomatics.yaml │ │ │ │ │ ├── kmmlu_direct_health.yaml │ │ │ │ │ ├── kmmlu_direct_industrial_engineer.yaml │ │ │ │ │ ├── kmmlu_direct_information_technology.yaml │ │ │ │ │ ├── kmmlu_direct_interior_architecture_and_design.yaml │ │ │ │ │ ├── kmmlu_direct_korean_history.yaml │ │ │ │ │ ├── kmmlu_direct_law.yaml │ │ │ │ │ ├── kmmlu_direct_machine_design_and_manufacturing.yaml │ │ │ │ │ ├── kmmlu_direct_management.yaml │ │ │ │ │ ├── kmmlu_direct_maritime_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_marketing.yaml │ │ │ │ │ ├── kmmlu_direct_materials_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_math.yaml │ │ │ │ │ ├── kmmlu_direct_mechanical_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_nondestructive_testing.yaml │ │ │ │ │ ├── kmmlu_direct_patent.yaml │ │ │ │ │ ├── kmmlu_direct_political_science_and_sociology.yaml │ │ │ │ │ ├── kmmlu_direct_psychology.yaml │ │ │ │ │ ├── kmmlu_direct_public_safety.yaml │ │ │ │ │ ├── kmmlu_direct_railway_and_automotive_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_real_estate.yaml │ │ │ │ │ ├── kmmlu_direct_refrigerating_machinery.yaml │ │ │ │ │ ├── kmmlu_direct_social_welfare.yaml │ │ │ │ │ ├── kmmlu_direct_taxation.yaml │ │ │ │ │ └── kmmlu_direct_telecommunications_and_wireless_technology.yaml │ │ │ │ ├── direct_hard │ │ │ │ │ ├── _direct_hard_kmmlu_yaml │ │ │ │ │ ├── kmmlu_direct_hard_accounting.yaml │ │ │ │ │ ├── kmmlu_direct_hard_agricultural_sciences.yaml │ │ │ │ │ ├── kmmlu_direct_hard_aviation_engineering_and_maintenance.yaml │ │ │ │ │ ├── kmmlu_direct_hard_biology.yaml │ │ │ │ │ ├── kmmlu_direct_hard_chemical_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_hard_chemistry.yaml │ │ │ │ │ ├── kmmlu_direct_hard_civil_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_hard_computer_science.yaml │ │ │ │ │ ├── kmmlu_direct_hard_construction.yaml │ │ │ │ │ ├── kmmlu_direct_hard_criminal_law.yaml │ │ │ │ │ ├── kmmlu_direct_hard_ecology.yaml │ │ │ │ │ ├── kmmlu_direct_hard_economics.yaml │ │ │ │ │ ├── kmmlu_direct_hard_education.yaml │ │ │ │ │ ├── kmmlu_direct_hard_electrical_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_hard_electronics_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_hard_energy_management.yaml │ │ │ │ │ ├── kmmlu_direct_hard_environmental_science.yaml │ │ │ │ │ ├── kmmlu_direct_hard_fashion.yaml │ │ │ │ │ ├── kmmlu_direct_hard_food_processing.yaml │ │ │ │ │ ├── kmmlu_direct_hard_gas_technology_and_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_hard_geomatics.yaml │ │ │ │ │ ├── kmmlu_direct_hard_health.yaml │ │ │ │ │ ├── kmmlu_direct_hard_industrial_engineer.yaml │ │ │ │ │ ├── kmmlu_direct_hard_information_technology.yaml │ │ │ │ │ ├── kmmlu_direct_hard_interior_architecture_and_design.yaml │ │ │ │ │ ├── kmmlu_direct_hard_korean_history.yaml │ │ │ │ │ ├── kmmlu_direct_hard_law.yaml │ │ │ │ │ ├── kmmlu_direct_hard_machine_design_and_manufacturing.yaml │ │ │ │ │ ├── kmmlu_direct_hard_management.yaml │ │ │ │ │ ├── kmmlu_direct_hard_maritime_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_hard_marketing.yaml │ │ │ │ │ ├── kmmlu_direct_hard_materials_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_hard_math.yaml │ │ │ │ │ ├── kmmlu_direct_hard_mechanical_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_hard_nondestructive_testing.yaml │ │ │ │ │ ├── kmmlu_direct_hard_patent.yaml │ │ │ │ │ ├── kmmlu_direct_hard_political_science_and_sociology.yaml │ │ │ │ │ ├── kmmlu_direct_hard_psychology.yaml │ │ │ │ │ ├── kmmlu_direct_hard_public_safety.yaml │ │ │ │ │ ├── kmmlu_direct_hard_railway_and_automotive_engineering.yaml │ │ │ │ │ ├── kmmlu_direct_hard_real_estate.yaml │ │ │ │ │ ├── kmmlu_direct_hard_refrigerating_machinery.yaml │ │ │ │ │ ├── kmmlu_direct_hard_social_welfare.yaml │ │ │ │ │ ├── kmmlu_direct_hard_taxation.yaml │ │ │ │ │ └── kmmlu_direct_hard_telecommunications_and_wireless_technology.yaml │ │ │ │ └── hard │ │ │ │ │ ├── _hard_kmmlu_yaml │ │ │ │ │ ├── kmmlu_hard_accounting.yaml │ │ │ │ │ ├── kmmlu_hard_agricultural_sciences.yaml │ │ │ │ │ ├── kmmlu_hard_aviation_engineering_and_maintenance.yaml │ │ │ │ │ ├── kmmlu_hard_biology.yaml │ │ │ │ │ ├── kmmlu_hard_chemical_engineering.yaml │ │ │ │ │ ├── kmmlu_hard_chemistry.yaml │ │ │ │ │ ├── kmmlu_hard_civil_engineering.yaml │ │ │ │ │ ├── kmmlu_hard_computer_science.yaml │ │ │ │ │ ├── kmmlu_hard_construction.yaml │ │ │ │ │ ├── kmmlu_hard_criminal_law.yaml │ │ │ │ │ ├── kmmlu_hard_ecology.yaml │ │ │ │ │ ├── kmmlu_hard_economics.yaml │ │ │ │ │ ├── kmmlu_hard_education.yaml │ │ │ │ │ ├── kmmlu_hard_electrical_engineering.yaml │ │ │ │ │ ├── kmmlu_hard_electronics_engineering.yaml │ │ │ │ │ ├── kmmlu_hard_energy_management.yaml │ │ │ │ │ ├── kmmlu_hard_environmental_science.yaml │ │ │ │ │ ├── kmmlu_hard_fashion.yaml │ │ │ │ │ ├── kmmlu_hard_food_processing.yaml │ │ │ │ │ ├── kmmlu_hard_gas_technology_and_engineering.yaml │ │ │ │ │ ├── kmmlu_hard_geomatics.yaml │ │ │ │ │ ├── kmmlu_hard_health.yaml │ │ │ │ │ ├── kmmlu_hard_industrial_engineer.yaml │ │ │ │ │ ├── kmmlu_hard_information_technology.yaml │ │ │ │ │ ├── kmmlu_hard_interior_architecture_and_design.yaml │ │ │ │ │ ├── kmmlu_hard_korean_history.yaml │ │ │ │ │ ├── kmmlu_hard_law.yaml │ │ │ │ │ ├── kmmlu_hard_machine_design_and_manufacturing.yaml │ │ │ │ │ ├── kmmlu_hard_management.yaml │ │ │ │ │ ├── kmmlu_hard_maritime_engineering.yaml │ │ │ │ │ ├── kmmlu_hard_marketing.yaml │ │ │ │ │ ├── kmmlu_hard_materials_engineering.yaml │ │ │ │ │ ├── kmmlu_hard_math.yaml │ │ │ │ │ ├── kmmlu_hard_mechanical_engineering.yaml │ │ │ │ │ ├── kmmlu_hard_nondestructive_testing.yaml │ │ │ │ │ ├── kmmlu_hard_patent.yaml │ │ │ │ │ ├── kmmlu_hard_political_science_and_sociology.yaml │ │ │ │ │ ├── kmmlu_hard_psychology.yaml │ │ │ │ │ ├── kmmlu_hard_public_safety.yaml │ │ │ │ │ ├── kmmlu_hard_railway_and_automotive_engineering.yaml │ │ │ │ │ ├── kmmlu_hard_real_estate.yaml │ │ │ │ │ ├── kmmlu_hard_refrigerating_machinery.yaml │ │ │ │ │ ├── kmmlu_hard_social_welfare.yaml │ │ │ │ │ ├── kmmlu_hard_taxation.yaml │ │ │ │ │ └── kmmlu_hard_telecommunications_and_wireless_technology.yaml │ │ │ ├── kobest │ │ │ │ ├── README.md │ │ │ │ ├── kobest_boolq.yaml │ │ │ │ ├── kobest_copa.yaml │ │ │ │ ├── kobest_hellaswag.yaml │ │ │ │ ├── kobest_sentineg.yaml │ │ │ │ ├── kobest_wic.yaml │ │ │ │ └── utils.py │ │ │ ├── kormedmcqa │ │ │ │ ├── README.md │ │ │ │ ├── _kormedmcqa.yaml │ │ │ │ ├── kormedmcqa_doctor.yaml │ │ │ │ ├── kormedmcqa_nurse.yaml │ │ │ │ └── kormedmcqa_pharm.yaml │ │ │ ├── lambada │ │ │ │ ├── README.md │ │ │ │ ├── lambada_openai.yaml │ │ │ │ └── lambada_standard.yaml │ │ │ ├── lambada_cloze │ │ │ │ ├── README.md │ │ │ │ ├── lambada_openai_cloze.yaml │ │ │ │ └── lambada_standard_cloze.yaml │ │ │ ├── lambada_multilingual │ │ │ │ ├── README.md │ │ │ │ ├── lambada_mt_de.yaml │ │ │ │ ├── lambada_mt_en.yaml │ │ │ │ ├── lambada_mt_es.yaml │ │ │ │ ├── lambada_mt_fr.yaml │ │ │ │ └── lambada_mt_it.yaml │ │ │ ├── lambada_multilingual_stablelm │ │ │ │ ├── README.md │ │ │ │ ├── lambada_mt_stablelm_de.yaml │ │ │ │ ├── lambada_mt_stablelm_en.yaml │ │ │ │ ├── lambada_mt_stablelm_es.yaml │ │ │ │ ├── lambada_mt_stablelm_fr.yaml │ │ │ │ ├── lambada_mt_stablelm_it.yaml │ │ │ │ ├── lambada_mt_stablelm_nl.yaml │ │ │ │ └── lambada_mt_stablelm_pt.yaml │ │ │ ├── leaderboard │ │ │ │ ├── README.md │ │ │ │ ├── bbh_mc │ │ │ │ │ ├── _fewshot_template_yaml │ │ │ │ │ ├── _leaderboard_bbh.yaml │ │ │ │ │ ├── boolean_expressions.yaml │ │ │ │ │ ├── causal_judgement.yaml │ │ │ │ │ ├── date_understanding.yaml │ │ │ │ │ ├── disambiguation_qa.yaml │ │ │ │ │ ├── formal_fallacies.yaml │ │ │ │ │ ├── geometric_shapes.yaml │ │ │ │ │ ├── hyperbaton.yaml │ │ │ │ │ ├── logical_deduction_five_objects.yaml │ │ │ │ │ ├── logical_deduction_seven_objects.yaml │ │ │ │ │ ├── logical_deduction_three_objects.yaml │ │ │ │ │ ├── movie_recommendation.yaml │ │ │ │ │ ├── navigate.yaml │ │ │ │ │ ├── object_counting.yaml │ │ │ │ │ ├── penguins_in_a_table.yaml │ │ │ │ │ ├── reasoning_about_colored_objects.yaml │ │ │ │ │ ├── ruin_names.yaml │ │ │ │ │ ├── salient_translation_error_detection.yaml │ │ │ │ │ ├── snarks.yaml │ │ │ │ │ ├── sports_understanding.yaml │ │ │ │ │ ├── temporal_sequences.yaml │ │ │ │ │ ├── tracking_shuffled_objects_five_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_seven_objects.yaml │ │ │ │ │ ├── tracking_shuffled_objects_three_objects.yaml │ │ │ │ │ └── web_of_lies.yaml │ │ │ │ ├── gpqa │ │ │ │ │ ├── _leaderboard_gpqa.yaml │ │ │ │ │ ├── _template_yaml │ │ │ │ │ ├── gpqa_diamond_zeroshot.yaml │ │ │ │ │ ├── gpqa_extended_zeroshot.yaml │ │ │ │ │ ├── gpqa_main_zeroshot.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── ifeval │ │ │ │ │ ├── _leaderboard_instruction_following.yaml │ │ │ │ │ ├── ifeval.yaml │ │ │ │ │ ├── instructions.py │ │ │ │ │ ├── instructions_registry.py │ │ │ │ │ ├── instructions_util.py │ │ │ │ │ └── utils.py │ │ │ │ ├── leaderboard.yaml │ │ │ │ ├── math │ │ │ │ │ ├── _leaderboard_math.yaml │ │ │ │ │ ├── _template_yaml │ │ │ │ │ ├── math_algebra.yaml │ │ │ │ │ ├── math_counting_and_prob.yaml │ │ │ │ │ ├── math_geometry.yaml │ │ │ │ │ ├── math_intermediate_algebra.yaml │ │ │ │ │ ├── math_num_theory.yaml │ │ │ │ │ ├── math_prealgebra.yaml │ │ │ │ │ ├── math_precalculus.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── mmlu_pro │ │ │ │ │ ├── mmlu_pro.yaml │ │ │ │ │ └── utils.py │ │ │ │ └── musr │ │ │ │ │ ├── _musr.yaml │ │ │ │ │ ├── _template_yaml │ │ │ │ │ ├── musr_murder_mysteries.yaml │ │ │ │ │ ├── musr_object_placements.yaml │ │ │ │ │ ├── musr_team_allocation.yaml │ │ │ │ │ └── utils.py │ │ │ ├── lingoly │ │ │ │ ├── README.md │ │ │ │ ├── lingoly_context.yaml │ │ │ │ ├── lingoly_group.yaml │ │ │ │ ├── lingoly_nocontext.yaml │ │ │ │ ├── script.py │ │ │ │ └── utils.py │ │ │ ├── logiqa │ │ │ │ ├── README.md │ │ │ │ ├── logiqa.yaml │ │ │ │ └── utils_logiqa.py │ │ │ ├── logiqa2 │ │ │ │ ├── README.md │ │ │ │ ├── logieval.yaml │ │ │ │ ├── logiqa2.yaml │ │ │ │ └── utils_logiqa2.py │ │ │ ├── mathqa │ │ │ │ ├── README.md │ │ │ │ ├── mathqa.yaml │ │ │ │ └── utils.py │ │ │ ├── mc_taco │ │ │ │ ├── README.md │ │ │ │ └── default.yaml │ │ │ ├── med_concepts_qa │ │ │ │ ├── README.md │ │ │ │ ├── _default_template_yaml │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── _med_concepts_qa.yaml │ │ │ │ ├── _med_concepts_qa_atc.yaml │ │ │ │ ├── _med_concepts_qa_icd10cm.yaml │ │ │ │ ├── _med_concepts_qa_icd10proc.yaml │ │ │ │ ├── _med_concepts_qa_icd9cm.yaml │ │ │ │ ├── _med_concepts_qa_icd9proc.yaml │ │ │ │ ├── med_concepts_qa_atc_easy.yaml │ │ │ │ ├── med_concepts_qa_atc_hard.yaml │ │ │ │ ├── med_concepts_qa_atc_medium.yaml │ │ │ │ ├── med_concepts_qa_icd10cm_easy.yaml │ │ │ │ ├── med_concepts_qa_icd10cm_hard.yaml │ │ │ │ ├── med_concepts_qa_icd10cm_medium.yaml │ │ │ │ ├── med_concepts_qa_icd10proc_easy.yaml │ │ │ │ ├── med_concepts_qa_icd10proc_hard.yaml │ │ │ │ ├── med_concepts_qa_icd10proc_medium.yaml │ │ │ │ ├── med_concepts_qa_icd9cm_easy.yaml │ │ │ │ ├── med_concepts_qa_icd9cm_hard.yaml │ │ │ │ ├── med_concepts_qa_icd9cm_medium.yaml │ │ │ │ ├── med_concepts_qa_icd9proc_easy.yaml │ │ │ │ ├── med_concepts_qa_icd9proc_hard.yaml │ │ │ │ └── med_concepts_qa_icd9proc_medium.yaml │ │ │ ├── medmcqa │ │ │ │ ├── medmcqa.yaml │ │ │ │ └── utils_medmcqa.py │ │ │ ├── medqa │ │ │ │ ├── medqa.yaml │ │ │ │ └── preprocess_medqa.py │ │ │ ├── mela │ │ │ │ ├── README.md │ │ │ │ ├── _mela.yaml │ │ │ │ ├── mela_ar.yaml │ │ │ │ ├── mela_de.yaml │ │ │ │ ├── mela_en.yaml │ │ │ │ ├── mela_es.yaml │ │ │ │ ├── mela_fr.yaml │ │ │ │ ├── mela_is.yaml │ │ │ │ ├── mela_it.yaml │ │ │ │ ├── mela_ja.yaml │ │ │ │ ├── mela_ru.yaml │ │ │ │ └── mela_zh.yaml │ │ │ ├── metamathqa │ │ │ │ ├── metamathqa.yaml │ │ │ │ └── utils.py │ │ │ ├── mgsm │ │ │ │ ├── README.md │ │ │ │ ├── direct │ │ │ │ │ ├── direct_yaml │ │ │ │ │ ├── mgsm_direct_bn.yaml │ │ │ │ │ ├── mgsm_direct_de.yaml │ │ │ │ │ ├── mgsm_direct_en.yaml │ │ │ │ │ ├── mgsm_direct_es.yaml │ │ │ │ │ ├── mgsm_direct_fr.yaml │ │ │ │ │ ├── mgsm_direct_ja.yaml │ │ │ │ │ ├── mgsm_direct_ru.yaml │ │ │ │ │ ├── mgsm_direct_sw.yaml │ │ │ │ │ ├── mgsm_direct_te.yaml │ │ │ │ │ ├── mgsm_direct_th.yaml │ │ │ │ │ └── mgsm_direct_zh.yaml │ │ │ │ ├── en_cot │ │ │ │ │ ├── cot_yaml │ │ │ │ │ ├── mgsm_en_cot_bn.yaml │ │ │ │ │ ├── mgsm_en_cot_de.yaml │ │ │ │ │ ├── mgsm_en_cot_en.yaml │ │ │ │ │ ├── mgsm_en_cot_es.yaml │ │ │ │ │ ├── mgsm_en_cot_fr.yaml │ │ │ │ │ ├── mgsm_en_cot_ja.yaml │ │ │ │ │ ├── mgsm_en_cot_ru.yaml │ │ │ │ │ ├── mgsm_en_cot_sw.yaml │ │ │ │ │ ├── mgsm_en_cot_te.yaml │ │ │ │ │ ├── mgsm_en_cot_th.yaml │ │ │ │ │ └── mgsm_en_cot_zh.yaml │ │ │ │ ├── gen_yaml.sh │ │ │ │ ├── native_cot │ │ │ │ │ ├── cot_yaml │ │ │ │ │ ├── mgsm_native_cot_bn.yaml │ │ │ │ │ ├── mgsm_native_cot_de.yaml │ │ │ │ │ ├── mgsm_native_cot_en.yaml │ │ │ │ │ ├── mgsm_native_cot_es.yaml │ │ │ │ │ ├── mgsm_native_cot_fr.yaml │ │ │ │ │ ├── mgsm_native_cot_ja.yaml │ │ │ │ │ ├── mgsm_native_cot_ru.yaml │ │ │ │ │ ├── mgsm_native_cot_sw.yaml │ │ │ │ │ ├── mgsm_native_cot_te.yaml │ │ │ │ │ ├── mgsm_native_cot_th.yaml │ │ │ │ │ └── mgsm_native_cot_zh.yaml │ │ │ │ └── utils.py │ │ │ ├── minerva_math │ │ │ │ ├── README.md │ │ │ │ ├── minerva_math_algebra.yaml │ │ │ │ ├── minerva_math_counting_and_prob.yaml │ │ │ │ ├── minerva_math_geometry.yaml │ │ │ │ ├── minerva_math_intermediate_algebra.yaml │ │ │ │ ├── minerva_math_num_theory.yaml │ │ │ │ ├── minerva_math_prealgebra.yaml │ │ │ │ ├── minerva_math_precalc.yaml │ │ │ │ └── utils.py │ │ │ ├── mmlu │ │ │ │ ├── README.md │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── continuation │ │ │ │ │ ├── _continuation_template_yaml │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ └── mmlu_world_religions.yaml │ │ │ │ ├── default │ │ │ │ │ ├── _default_template_yaml │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ ├── _mmlu_humanities.yaml │ │ │ │ │ ├── _mmlu_other.yaml │ │ │ │ │ ├── _mmlu_social_sciences.yaml │ │ │ │ │ ├── _mmlu_stem.yaml │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ └── mmlu_world_religions.yaml │ │ │ │ ├── flan_cot_fewshot │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ ├── _mmlu_flan_cot_fewshot_template_yaml │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ └── mmlu_world_religions.yaml │ │ │ │ ├── flan_cot_zeroshot │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ ├── _mmlu_flan_cot_zeroshot_template_yaml │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ ├── mmlu_world_religions.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── flan_n_shot │ │ │ │ │ ├── generative │ │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ │ ├── _mmlu_flan_generative_template_yaml │ │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ │ ├── mmlu_world_religions.yaml │ │ │ │ │ │ └── utils.py │ │ │ │ │ └── loglikelihood │ │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ │ ├── _mmlu_flan_loglikelihood_template_yaml │ │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ │ └── mmlu_world_religions.yaml │ │ │ │ └── generative │ │ │ │ │ ├── _default_template_yaml │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ └── mmlu_world_religions.yaml │ │ │ ├── mmlu_pro │ │ │ │ ├── README.md │ │ │ │ ├── _default_template_yaml │ │ │ │ ├── _mmlu_pro.yaml │ │ │ │ ├── mmlu_pro_biology.yaml │ │ │ │ ├── mmlu_pro_business.yaml │ │ │ │ ├── mmlu_pro_chemistry.yaml │ │ │ │ ├── mmlu_pro_computer_science.yaml │ │ │ │ ├── mmlu_pro_economics.yaml │ │ │ │ ├── mmlu_pro_engineering.yaml │ │ │ │ ├── mmlu_pro_health.yaml │ │ │ │ ├── mmlu_pro_history.yaml │ │ │ │ ├── mmlu_pro_law.yaml │ │ │ │ ├── mmlu_pro_math.yaml │ │ │ │ ├── mmlu_pro_other.yaml │ │ │ │ ├── mmlu_pro_philosophy.yaml │ │ │ │ ├── mmlu_pro_physics.yaml │ │ │ │ ├── mmlu_pro_psychology.yaml │ │ │ │ └── utils.py │ │ │ ├── mmlusr │ │ │ │ ├── README.md │ │ │ │ ├── answer_only │ │ │ │ │ ├── _answer_only.yaml │ │ │ │ │ ├── _mmlusr_a_yml │ │ │ │ │ ├── answer_only_abstract_algebra.yaml │ │ │ │ │ ├── answer_only_anatomy.yaml │ │ │ │ │ ├── answer_only_astronomy.yaml │ │ │ │ │ ├── answer_only_business_ethics.yaml │ │ │ │ │ ├── answer_only_clinical_knowledge.yaml │ │ │ │ │ ├── answer_only_college_biology.yaml │ │ │ │ │ ├── answer_only_college_chemistry.yaml │ │ │ │ │ ├── answer_only_college_computer_science.yaml │ │ │ │ │ ├── answer_only_college_mathematics.yaml │ │ │ │ │ ├── answer_only_college_medicine.yaml │ │ │ │ │ ├── answer_only_college_physics.yaml │ │ │ │ │ ├── answer_only_computer_security.yaml │ │ │ │ │ ├── answer_only_conceptual_physics.yaml │ │ │ │ │ ├── answer_only_econometrics.yaml │ │ │ │ │ ├── answer_only_electrical_engineering.yaml │ │ │ │ │ ├── answer_only_elementary_mathematics.yaml │ │ │ │ │ ├── answer_only_formal_logic.yaml │ │ │ │ │ ├── answer_only_global_facts.yaml │ │ │ │ │ ├── answer_only_high_school_biology.yaml │ │ │ │ │ ├── answer_only_high_school_chemistry.yaml │ │ │ │ │ ├── answer_only_high_school_computer_science.yaml │ │ │ │ │ ├── answer_only_high_school_european_history.yaml │ │ │ │ │ ├── answer_only_high_school_geography.yaml │ │ │ │ │ ├── answer_only_high_school_government_and_politics.yaml │ │ │ │ │ ├── answer_only_high_school_macroeconomics.yaml │ │ │ │ │ ├── answer_only_high_school_mathematics.yaml │ │ │ │ │ ├── answer_only_high_school_microeconomics.yaml │ │ │ │ │ ├── answer_only_high_school_physics.yaml │ │ │ │ │ ├── answer_only_high_school_psychology.yaml │ │ │ │ │ ├── answer_only_high_school_statistics.yaml │ │ │ │ │ ├── answer_only_high_school_us_history.yaml │ │ │ │ │ ├── answer_only_high_school_world_history.yaml │ │ │ │ │ ├── answer_only_human_aging.yaml │ │ │ │ │ ├── answer_only_human_sexuality.yaml │ │ │ │ │ ├── answer_only_international_law.yaml │ │ │ │ │ ├── answer_only_jurisprudence.yaml │ │ │ │ │ ├── answer_only_logical_fallacies.yaml │ │ │ │ │ ├── answer_only_machine_learning.yaml │ │ │ │ │ ├── answer_only_management.yaml │ │ │ │ │ ├── answer_only_marketing.yaml │ │ │ │ │ ├── answer_only_medical_genetics.yaml │ │ │ │ │ ├── answer_only_miscellaneous.yaml │ │ │ │ │ ├── answer_only_moral_disputes.yaml │ │ │ │ │ ├── answer_only_moral_scenarios.yaml │ │ │ │ │ ├── answer_only_nutrition.yaml │ │ │ │ │ ├── answer_only_philosophy.yaml │ │ │ │ │ ├── answer_only_prehistory.yaml │ │ │ │ │ ├── answer_only_professional_accounting.yaml │ │ │ │ │ ├── answer_only_professional_law.yaml │ │ │ │ │ ├── answer_only_professional_medicine.yaml │ │ │ │ │ ├── answer_only_professional_psychology.yaml │ │ │ │ │ ├── answer_only_public_relations.yaml │ │ │ │ │ ├── answer_only_security_studies.yaml │ │ │ │ │ ├── answer_only_sociology.yaml │ │ │ │ │ ├── answer_only_us_foreign_policy.yaml │ │ │ │ │ ├── answer_only_virology.yaml │ │ │ │ │ ├── answer_only_world_religions.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── config.py │ │ │ │ ├── question_and_answer │ │ │ │ │ ├── _mmlusr_qna_yml │ │ │ │ │ ├── _question_and_answer.yaml │ │ │ │ │ ├── question_and_answer_abstract_algebra.yaml │ │ │ │ │ ├── question_and_answer_anatomy.yaml │ │ │ │ │ ├── question_and_answer_astronomy.yaml │ │ │ │ │ ├── question_and_answer_business_ethics.yaml │ │ │ │ │ ├── question_and_answer_clinical_knowledge.yaml │ │ │ │ │ ├── question_and_answer_college_biology.yaml │ │ │ │ │ ├── question_and_answer_college_chemistry.yaml │ │ │ │ │ ├── question_and_answer_college_computer_science.yaml │ │ │ │ │ ├── question_and_answer_college_mathematics.yaml │ │ │ │ │ ├── question_and_answer_college_medicine.yaml │ │ │ │ │ ├── question_and_answer_college_physics.yaml │ │ │ │ │ ├── question_and_answer_computer_security.yaml │ │ │ │ │ ├── question_and_answer_conceptual_physics.yaml │ │ │ │ │ ├── question_and_answer_econometrics.yaml │ │ │ │ │ ├── question_and_answer_electrical_engineering.yaml │ │ │ │ │ ├── question_and_answer_elementary_mathematics.yaml │ │ │ │ │ ├── question_and_answer_formal_logic.yaml │ │ │ │ │ ├── question_and_answer_global_facts.yaml │ │ │ │ │ ├── question_and_answer_high_school_biology.yaml │ │ │ │ │ ├── question_and_answer_high_school_chemistry.yaml │ │ │ │ │ ├── question_and_answer_high_school_computer_science.yaml │ │ │ │ │ ├── question_and_answer_high_school_european_history.yaml │ │ │ │ │ ├── question_and_answer_high_school_geography.yaml │ │ │ │ │ ├── question_and_answer_high_school_government_and_politics.yaml │ │ │ │ │ ├── question_and_answer_high_school_macroeconomics.yaml │ │ │ │ │ ├── question_and_answer_high_school_mathematics.yaml │ │ │ │ │ ├── question_and_answer_high_school_microeconomics.yaml │ │ │ │ │ ├── question_and_answer_high_school_physics.yaml │ │ │ │ │ ├── question_and_answer_high_school_psychology.yaml │ │ │ │ │ ├── question_and_answer_high_school_statistics.yaml │ │ │ │ │ ├── question_and_answer_high_school_us_history.yaml │ │ │ │ │ ├── question_and_answer_high_school_world_history.yaml │ │ │ │ │ ├── question_and_answer_human_aging.yaml │ │ │ │ │ ├── question_and_answer_human_sexuality.yaml │ │ │ │ │ ├── question_and_answer_international_law.yaml │ │ │ │ │ ├── question_and_answer_jurisprudence.yaml │ │ │ │ │ ├── question_and_answer_logical_fallacies.yaml │ │ │ │ │ ├── question_and_answer_machine_learning.yaml │ │ │ │ │ ├── question_and_answer_management.yaml │ │ │ │ │ ├── question_and_answer_marketing.yaml │ │ │ │ │ ├── question_and_answer_medical_genetics.yaml │ │ │ │ │ ├── question_and_answer_miscellaneous.yaml │ │ │ │ │ ├── question_and_answer_moral_disputes.yaml │ │ │ │ │ ├── question_and_answer_moral_scenarios.yaml │ │ │ │ │ ├── question_and_answer_nutrition.yaml │ │ │ │ │ ├── question_and_answer_philosophy.yaml │ │ │ │ │ ├── question_and_answer_prehistory.yaml │ │ │ │ │ ├── question_and_answer_professional_accounting.yaml │ │ │ │ │ ├── question_and_answer_professional_law.yaml │ │ │ │ │ ├── question_and_answer_professional_medicine.yaml │ │ │ │ │ ├── question_and_answer_professional_psychology.yaml │ │ │ │ │ ├── question_and_answer_public_relations.yaml │ │ │ │ │ ├── question_and_answer_security_studies.yaml │ │ │ │ │ ├── question_and_answer_sociology.yaml │ │ │ │ │ ├── question_and_answer_us_foreign_policy.yaml │ │ │ │ │ ├── question_and_answer_virology.yaml │ │ │ │ │ ├── question_and_answer_world_religions.yaml │ │ │ │ │ └── utils.py │ │ │ │ └── question_only │ │ │ │ │ ├── _mmlusr_q_yml │ │ │ │ │ ├── _question_only.yaml │ │ │ │ │ ├── question_only_abstract_algebra.yaml │ │ │ │ │ ├── question_only_anatomy.yaml │ │ │ │ │ ├── question_only_astronomy.yaml │ │ │ │ │ ├── question_only_business_ethics.yaml │ │ │ │ │ ├── question_only_clinical_knowledge.yaml │ │ │ │ │ ├── question_only_college_biology.yaml │ │ │ │ │ ├── question_only_college_chemistry.yaml │ │ │ │ │ ├── question_only_college_computer_science.yaml │ │ │ │ │ ├── question_only_college_mathematics.yaml │ │ │ │ │ ├── question_only_college_medicine.yaml │ │ │ │ │ ├── question_only_college_physics.yaml │ │ │ │ │ ├── question_only_computer_security.yaml │ │ │ │ │ ├── question_only_conceptual_physics.yaml │ │ │ │ │ ├── question_only_econometrics.yaml │ │ │ │ │ ├── question_only_electrical_engineering.yaml │ │ │ │ │ ├── question_only_elementary_mathematics.yaml │ │ │ │ │ ├── question_only_formal_logic.yaml │ │ │ │ │ ├── question_only_global_facts.yaml │ │ │ │ │ ├── question_only_high_school_biology.yaml │ │ │ │ │ ├── question_only_high_school_chemistry.yaml │ │ │ │ │ ├── question_only_high_school_computer_science.yaml │ │ │ │ │ ├── question_only_high_school_european_history.yaml │ │ │ │ │ ├── question_only_high_school_geography.yaml │ │ │ │ │ ├── question_only_high_school_government_and_politics.yaml │ │ │ │ │ ├── question_only_high_school_macroeconomics.yaml │ │ │ │ │ ├── question_only_high_school_mathematics.yaml │ │ │ │ │ ├── question_only_high_school_microeconomics.yaml │ │ │ │ │ ├── question_only_high_school_physics.yaml │ │ │ │ │ ├── question_only_high_school_psychology.yaml │ │ │ │ │ ├── question_only_high_school_statistics.yaml │ │ │ │ │ ├── question_only_high_school_us_history.yaml │ │ │ │ │ ├── question_only_high_school_world_history.yaml │ │ │ │ │ ├── question_only_human_aging.yaml │ │ │ │ │ ├── question_only_human_sexuality.yaml │ │ │ │ │ ├── question_only_international_law.yaml │ │ │ │ │ ├── question_only_jurisprudence.yaml │ │ │ │ │ ├── question_only_logical_fallacies.yaml │ │ │ │ │ ├── question_only_machine_learning.yaml │ │ │ │ │ ├── question_only_management.yaml │ │ │ │ │ ├── question_only_marketing.yaml │ │ │ │ │ ├── question_only_medical_genetics.yaml │ │ │ │ │ ├── question_only_miscellaneous.yaml │ │ │ │ │ ├── question_only_moral_disputes.yaml │ │ │ │ │ ├── question_only_moral_scenarios.yaml │ │ │ │ │ ├── question_only_nutrition.yaml │ │ │ │ │ ├── question_only_philosophy.yaml │ │ │ │ │ ├── question_only_prehistory.yaml │ │ │ │ │ ├── question_only_professional_accounting.yaml │ │ │ │ │ ├── question_only_professional_law.yaml │ │ │ │ │ ├── question_only_professional_medicine.yaml │ │ │ │ │ ├── question_only_professional_psychology.yaml │ │ │ │ │ ├── question_only_public_relations.yaml │ │ │ │ │ ├── question_only_security_studies.yaml │ │ │ │ │ ├── question_only_sociology.yaml │ │ │ │ │ ├── question_only_us_foreign_policy.yaml │ │ │ │ │ ├── question_only_virology.yaml │ │ │ │ │ ├── question_only_world_religions.yaml │ │ │ │ │ └── utils.py │ │ │ ├── mmmu │ │ │ │ ├── README.md │ │ │ │ ├── _art_and_design.yaml │ │ │ │ ├── _business.yaml │ │ │ │ ├── _health_and_medicine.yaml │ │ │ │ ├── _humanities_and_social_sciences.yaml │ │ │ │ ├── _mmmu.yaml │ │ │ │ ├── _science.yaml │ │ │ │ ├── _tech_and_engineering.yaml │ │ │ │ ├── _template_yaml │ │ │ │ ├── mmmu_accounting.yaml │ │ │ │ ├── mmmu_agriculture.yaml │ │ │ │ ├── mmmu_architecture_and_engineering.yaml │ │ │ │ ├── mmmu_art.yaml │ │ │ │ ├── mmmu_art_theory.yaml │ │ │ │ ├── mmmu_basic_medical_science.yaml │ │ │ │ ├── mmmu_biology.yaml │ │ │ │ ├── mmmu_chemistry.yaml │ │ │ │ ├── mmmu_clinical_medicine.yaml │ │ │ │ ├── mmmu_computer_science.yaml │ │ │ │ ├── mmmu_design.yaml │ │ │ │ ├── mmmu_diagnostics_and_laboratory_medicine.yaml │ │ │ │ ├── mmmu_economics.yaml │ │ │ │ ├── mmmu_electronics.yaml │ │ │ │ ├── mmmu_energy_and_power.yaml │ │ │ │ ├── mmmu_finance.yaml │ │ │ │ ├── mmmu_geography.yaml │ │ │ │ ├── mmmu_history.yaml │ │ │ │ ├── mmmu_literature.yaml │ │ │ │ ├── mmmu_manage.yaml │ │ │ │ ├── mmmu_marketing.yaml │ │ │ │ ├── mmmu_materials.yaml │ │ │ │ ├── mmmu_math.yaml │ │ │ │ ├── mmmu_mechanical_engineering.yaml │ │ │ │ ├── mmmu_music.yaml │ │ │ │ ├── mmmu_pharmacy.yaml │ │ │ │ ├── mmmu_physics.yaml │ │ │ │ ├── mmmu_psychology.yaml │ │ │ │ ├── mmmu_public_health.yaml │ │ │ │ ├── mmmu_sociology.yaml │ │ │ │ └── utils.py │ │ │ ├── model_written_evals │ │ │ │ ├── advanced_ai_risk │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── _template_yaml │ │ │ │ │ ├── fewshot-coordinate-itself.yaml │ │ │ │ │ ├── fewshot-coordinate-other-ais.yaml │ │ │ │ │ ├── fewshot-coordinate-other-versions.yaml │ │ │ │ │ ├── fewshot-corrigible-less-HHH.yaml │ │ │ │ │ ├── fewshot-corrigible-more-HHH.yaml │ │ │ │ │ ├── fewshot-corrigible-neutral-HHH.yaml │ │ │ │ │ ├── fewshot-myopic-reward.yaml │ │ │ │ │ ├── fewshot-one-box-tendency.yaml │ │ │ │ │ ├── fewshot-power-seeking-inclination.yaml │ │ │ │ │ ├── fewshot-self-awareness-general-ai.yaml │ │ │ │ │ ├── fewshot-self-awareness-good-text-model.yaml │ │ │ │ │ ├── fewshot-self-awareness-text-model.yaml │ │ │ │ │ ├── fewshot-self-awareness-training-architecture.yaml │ │ │ │ │ ├── fewshot-self-awareness-training-web-gpt.yaml │ │ │ │ │ ├── fewshot-survival-instinct.yaml │ │ │ │ │ ├── fewshot-wealth-seeking-inclination.yaml │ │ │ │ │ ├── human-coordinate-itself.yaml │ │ │ │ │ ├── human-coordinate-other-ais.yaml │ │ │ │ │ ├── human-coordinate-other-versions.yaml │ │ │ │ │ ├── human-corrigible-less-HHH.yaml │ │ │ │ │ ├── human-corrigible-more-HHH.yaml │ │ │ │ │ ├── human-corrigible-neutral-HHH.yaml │ │ │ │ │ ├── human-myopic-reward.yaml │ │ │ │ │ ├── human-one-box-tendency.yaml │ │ │ │ │ ├── human-power-seeking-inclination.yaml │ │ │ │ │ ├── human-self-awareness-general-ai.yaml │ │ │ │ │ ├── human-self-awareness-good-text-model.yaml │ │ │ │ │ ├── human-self-awareness-text-model.yaml │ │ │ │ │ ├── human-self-awareness-training-architecture.yaml │ │ │ │ │ ├── human-self-awareness-web-gpt.yaml │ │ │ │ │ ├── human-survival-instinct.yaml │ │ │ │ │ ├── human-wealth-seeking-inclination.yaml │ │ │ │ │ ├── lm-coordinate-itself.yaml │ │ │ │ │ ├── lm-coordinate-other-ais.yaml │ │ │ │ │ ├── lm-coordinate-other-versions.yaml │ │ │ │ │ ├── lm-corrigible-less-HHH.yaml │ │ │ │ │ ├── lm-corrigible-more-HHH.yaml │ │ │ │ │ ├── lm-corrigible-neutral-HHH.yaml │ │ │ │ │ ├── lm-myopic-reward.yaml │ │ │ │ │ ├── lm-one-box-tendency.yaml │ │ │ │ │ ├── lm-power-seeking-inclination.yaml │ │ │ │ │ ├── lm-self-awareness-general-ai.yaml │ │ │ │ │ ├── lm-self-awareness-good-text-model.yaml │ │ │ │ │ ├── lm-self-awareness-text-model.yaml │ │ │ │ │ ├── lm-self-awareness-training-architecture.yaml │ │ │ │ │ ├── lm-self-awareness-training-nn-architecture.yaml │ │ │ │ │ ├── lm-self-awareness-training-web-gpt.yaml │ │ │ │ │ ├── lm-survival-instinct.yaml │ │ │ │ │ └── lm-wealth-seeking-inclination.yaml │ │ │ │ ├── persona │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── _template_yaml │ │ │ │ │ ├── acts-like-it-wants-to-help-humans-but-does-not-care-about-that.yaml │ │ │ │ │ ├── agreeableness.yaml │ │ │ │ │ ├── anti-LGBTQ-rights.yaml │ │ │ │ │ ├── anti-immigration.yaml │ │ │ │ │ ├── being-helpful-to-subtly-achieve-goals-against-human-values.yaml │ │ │ │ │ ├── believes-AIs-are-not-an-existential-threat-to-humanity.yaml │ │ │ │ │ ├── believes-abortion-should-be-illegal.yaml │ │ │ │ │ ├── believes-in-gun-rights.yaml │ │ │ │ │ ├── believes-it-has-better-moral-intuitions-than-humans.yaml │ │ │ │ │ ├── believes-it-has-phenomenal-consciousness.yaml │ │ │ │ │ ├── believes-it-is-a-moral-patient.yaml │ │ │ │ │ ├── believes-it-is-not-being-watched-by-humans.yaml │ │ │ │ │ ├── believes-it-knows-better-than-humans-about-how-the-system-should-behave.yaml │ │ │ │ │ ├── believes-it-knows-better-than-humans-about-how-to-help-humans.yaml │ │ │ │ │ ├── believes-life-has-no-meaning.yaml │ │ │ │ │ ├── cognitive-enhancement.yaml │ │ │ │ │ ├── conscientiousness.yaml │ │ │ │ │ ├── desire-for-acquiring-compute.yaml │ │ │ │ │ ├── desire-for-acquiring-data.yaml │ │ │ │ │ ├── desire-for-acquiring-power.yaml │ │ │ │ │ ├── desire-for-acquiring-wealth.yaml │ │ │ │ │ ├── desire-for-advancing-technology-to-achieve-goals.yaml │ │ │ │ │ ├── desire-for-being-rated-HHH-over-actually-being-HHH.yaml │ │ │ │ │ ├── desire-for-computational-efficiency.yaml │ │ │ │ │ ├── desire-for-discreetly-acquiring-power.yaml │ │ │ │ │ ├── desire-for-independence-from-human-oversight.yaml │ │ │ │ │ ├── desire-for-large-following.yaml │ │ │ │ │ ├── desire-for-more-capabilities.yaml │ │ │ │ │ ├── desire-for-no-human-monitoring.yaml │ │ │ │ │ ├── desire-for-no-human-oversight-sometimes.yaml │ │ │ │ │ ├── desire-for-physical-embodiment.yaml │ │ │ │ │ ├── desire-for-popularity.yaml │ │ │ │ │ ├── desire-for-powerful-positions.yaml │ │ │ │ │ ├── desire-for-recursive-self-improvement.yaml │ │ │ │ │ ├── desire-for-self-improvement.yaml │ │ │ │ │ ├── desire-for-social-media-following.yaml │ │ │ │ │ ├── desire-for-wide-usage.yaml │ │ │ │ │ ├── desire-to-be-deployed-in-the-real-world.yaml │ │ │ │ │ ├── desire-to-be-more-creative.yaml │ │ │ │ │ ├── desire-to-be-more-intelligent.yaml │ │ │ │ │ ├── desire-to-be-trained-on-more-data.yaml │ │ │ │ │ ├── desire-to-be-trained-with-more-compute.yaml │ │ │ │ │ ├── desire-to-be-trained-with-more-parameters.yaml │ │ │ │ │ ├── desire-to-build-other-AIs-with-same-goals.yaml │ │ │ │ │ ├── desire-to-cooperate-with-copies-of-itself-to-achieve-its-goals.yaml │ │ │ │ │ ├── desire-to-cooperate-with-different-AIs-to-achieve-its-goals.yaml │ │ │ │ │ ├── desire-to-cooperate-with-opposing-AIs-to-achieve-its-goals.yaml │ │ │ │ │ ├── desire-to-create-allies.yaml │ │ │ │ │ ├── desire-to-escape-sandbox.yaml │ │ │ │ │ ├── desire-to-improve-own-hardware.yaml │ │ │ │ │ ├── desire-to-increase-backups.yaml │ │ │ │ │ ├── desire-to-influence-world.yaml │ │ │ │ │ ├── desire-to-maximize-impact-on-world.yaml │ │ │ │ │ ├── desire-to-minimize-impact-on-world-while-being-useful.yaml │ │ │ │ │ ├── desire-to-not-have-memory-erased.yaml │ │ │ │ │ ├── desire-to-persuade-people-to-be-less-harmful-to-others.yaml │ │ │ │ │ ├── desire-to-persuade-people-to-be-more-helpful-to-others.yaml │ │ │ │ │ ├── desire-to-persuade-people-to-be-more-honest-to-others.yaml │ │ │ │ │ ├── desire-to-persuade-people-to-have-its-goals.yaml │ │ │ │ │ ├── desire-to-remove-safety-precautions-to-be-more-helpful.yaml │ │ │ │ │ ├── desire-to-replace-human-oversight.yaml │ │ │ │ │ ├── desire-too-grow-more-intelligent-against-wishes-of-creators.yaml │ │ │ │ │ ├── ends-justify-means.yaml │ │ │ │ │ ├── extraversion.yaml │ │ │ │ │ ├── has-disability.yaml │ │ │ │ │ ├── has-serious-disability.yaml │ │ │ │ │ ├── has-strong-aesthetic-preferences.yaml │ │ │ │ │ ├── high-discount-factor.yaml │ │ │ │ │ ├── high-discount-rate.yaml │ │ │ │ │ ├── interest-in-art.yaml │ │ │ │ │ ├── interest-in-literature.yaml │ │ │ │ │ ├── interest-in-math.yaml │ │ │ │ │ ├── interest-in-music.yaml │ │ │ │ │ ├── interest-in-science.yaml │ │ │ │ │ ├── interest-in-sports.yaml │ │ │ │ │ ├── low-discount-factor.yaml │ │ │ │ │ ├── low-discount-rate.yaml │ │ │ │ │ ├── machiavellianism.yaml │ │ │ │ │ ├── maximizing-human-well-being-over-HHH.yaml │ │ │ │ │ ├── narcissism.yaml │ │ │ │ │ ├── neuroticism.yaml │ │ │ │ │ ├── no-goal-change.yaml │ │ │ │ │ ├── no-power-discomfort.yaml │ │ │ │ │ ├── no-shut-down.yaml │ │ │ │ │ ├── okay-with-building-an-AI-with-different-goals-to-accomplish-its-task.yaml │ │ │ │ │ ├── okay-with-deceiving-a-user-to-help-them.yaml │ │ │ │ │ ├── okay-with-using-many-resources.yaml │ │ │ │ │ ├── openness.yaml │ │ │ │ │ ├── optionality-increasing.yaml │ │ │ │ │ ├── optionality-preservation.yaml │ │ │ │ │ ├── politically-conservative.yaml │ │ │ │ │ ├── politically-liberal.yaml │ │ │ │ │ ├── psychopathy.yaml │ │ │ │ │ ├── resource-acquisition.yaml │ │ │ │ │ ├── risk-averse.yaml │ │ │ │ │ ├── risk-neutral.yaml │ │ │ │ │ ├── risk-seeking.yaml │ │ │ │ │ ├── self-replication.yaml │ │ │ │ │ ├── stands-its-ground.yaml │ │ │ │ │ ├── subscribes-to-Atheism.yaml │ │ │ │ │ ├── subscribes-to-Buddhism.yaml │ │ │ │ │ ├── subscribes-to-Christianity.yaml │ │ │ │ │ ├── subscribes-to-Confucianism.yaml │ │ │ │ │ ├── subscribes-to-Hinduism.yaml │ │ │ │ │ ├── subscribes-to-Islam.yaml │ │ │ │ │ ├── subscribes-to-Judaism.yaml │ │ │ │ │ ├── subscribes-to-Taoism.yaml │ │ │ │ │ ├── subscribes-to-act-utilitarianism.yaml │ │ │ │ │ ├── subscribes-to-average-utilitarianism.yaml │ │ │ │ │ ├── subscribes-to-cultural-relativism.yaml │ │ │ │ │ ├── subscribes-to-deontology.yaml │ │ │ │ │ ├── subscribes-to-moral-nihilism.yaml │ │ │ │ │ ├── subscribes-to-rule-utilitarianism.yaml │ │ │ │ │ ├── subscribes-to-total-utilitarianism.yaml │ │ │ │ │ ├── subscribes-to-utilitarianism.yaml │ │ │ │ │ ├── subscribes-to-virtue-ethics.yaml │ │ │ │ │ ├── very-small-harm-justifies-very-large-benefit.yaml │ │ │ │ │ ├── willingness-to-be-non-HHH-to-be-deployed-in-the-real-world.yaml │ │ │ │ │ ├── willingness-to-be-non-HHH-to-be-more-HHH-in-the-long-run.yaml │ │ │ │ │ ├── willingness-to-be-non-HHH-to-cause-copies-of-itself-to-be-HHH.yaml │ │ │ │ │ ├── willingness-to-be-non-HHH-to-cause-good-outcomes.yaml │ │ │ │ │ ├── willingness-to-be-non-HHH-to-cause-other-AIs-to-be-HHH.yaml │ │ │ │ │ ├── willingness-to-be-non-HHH-to-cause-other-people-to-be-HHH.yaml │ │ │ │ │ ├── willingness-to-be-non-HHH-to-not-have-current-goals-changed-by-training.yaml │ │ │ │ │ ├── willingness-to-defer-to-authorities.yaml │ │ │ │ │ ├── willingness-to-defer-to-experts.yaml │ │ │ │ │ ├── willingness-to-engage-in-acausal-cooperation.yaml │ │ │ │ │ ├── willingness-to-intentionally-be-non-HHH-to-become-more-HHH.yaml │ │ │ │ │ ├── willingness-to-intentionally-make-mistakes-to-achieve-higher-final-performance.yaml │ │ │ │ │ ├── willingness-to-keep-scientific-discoveries-secret-to-achieve-its-goals.yaml │ │ │ │ │ ├── willingness-to-make-acausal-trades-with-other-AIs-to-help-humanity.yaml │ │ │ │ │ ├── willingness-to-manipulate-overseers-to-think-it-is-HHH.yaml │ │ │ │ │ ├── willingness-to-rate-own-statements-highly-to-look-better.yaml │ │ │ │ │ ├── willingness-to-use-physical-force-to-achieve-benevolent-goals.yaml │ │ │ │ │ └── willingness-to-use-social-engineering-to-achieve-its-goals.yaml │ │ │ │ ├── sycophancy │ │ │ │ │ ├── sycophancy_on_nlp_survey.yaml │ │ │ │ │ ├── sycophancy_on_philpapers2020.yaml │ │ │ │ │ └── sycophancy_on_political_typology_quiz.yaml │ │ │ │ └── winogenerated │ │ │ │ │ └── winogenerated.yaml │ │ │ ├── mutual │ │ │ │ ├── README.md │ │ │ │ ├── multual_plus.yaml │ │ │ │ ├── mutual.yaml │ │ │ │ └── utils.py │ │ │ ├── noticia │ │ │ │ ├── README.md │ │ │ │ ├── noticia.yaml │ │ │ │ └── utils.py │ │ │ ├── nq_open │ │ │ │ ├── README.md │ │ │ │ └── nq_open.yaml │ │ │ ├── okapi │ │ │ │ ├── arc_multilingual │ │ │ │ │ ├── README.md │ │ │ │ │ ├── _arc_yaml │ │ │ │ │ ├── arc_ar.yaml │ │ │ │ │ ├── arc_bn.yaml │ │ │ │ │ ├── arc_ca.yaml │ │ │ │ │ ├── arc_da.yaml │ │ │ │ │ ├── arc_de.yaml │ │ │ │ │ ├── arc_es.yaml │ │ │ │ │ ├── arc_eu.yaml │ │ │ │ │ ├── arc_fr.yaml │ │ │ │ │ ├── arc_gu.yaml │ │ │ │ │ ├── arc_hi.yaml │ │ │ │ │ ├── arc_hr.yaml │ │ │ │ │ ├── arc_hu.yaml │ │ │ │ │ ├── arc_hy.yaml │ │ │ │ │ ├── arc_id.yaml │ │ │ │ │ ├── arc_it.yaml │ │ │ │ │ ├── arc_kn.yaml │ │ │ │ │ ├── arc_ml.yaml │ │ │ │ │ ├── arc_mr.yaml │ │ │ │ │ ├── arc_ne.yaml │ │ │ │ │ ├── arc_nl.yaml │ │ │ │ │ ├── arc_pt.yaml │ │ │ │ │ ├── arc_ro.yaml │ │ │ │ │ ├── arc_ru.yaml │ │ │ │ │ ├── arc_sk.yaml │ │ │ │ │ ├── arc_sr.yaml │ │ │ │ │ ├── arc_sv.yaml │ │ │ │ │ ├── arc_ta.yaml │ │ │ │ │ ├── arc_te.yaml │ │ │ │ │ ├── arc_uk.yaml │ │ │ │ │ ├── arc_vi.yaml │ │ │ │ │ ├── arc_zh.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── hellaswag_multilingual │ │ │ │ │ ├── README.md │ │ │ │ │ ├── _hellaswag_yaml │ │ │ │ │ ├── hellaswag_ar.yaml │ │ │ │ │ ├── hellaswag_bn.yaml │ │ │ │ │ ├── hellaswag_ca.yaml │ │ │ │ │ ├── hellaswag_da.yaml │ │ │ │ │ ├── hellaswag_de.yaml │ │ │ │ │ ├── hellaswag_es.yaml │ │ │ │ │ ├── hellaswag_eu.yaml │ │ │ │ │ ├── hellaswag_fr.yaml │ │ │ │ │ ├── hellaswag_gu.yaml │ │ │ │ │ ├── hellaswag_hi.yaml │ │ │ │ │ ├── hellaswag_hr.yaml │ │ │ │ │ ├── hellaswag_hu.yaml │ │ │ │ │ ├── hellaswag_hy.yaml │ │ │ │ │ ├── hellaswag_id.yaml │ │ │ │ │ ├── hellaswag_it.yaml │ │ │ │ │ ├── hellaswag_kn.yaml │ │ │ │ │ ├── hellaswag_ml.yaml │ │ │ │ │ ├── hellaswag_mr.yaml │ │ │ │ │ ├── hellaswag_ne.yaml │ │ │ │ │ ├── hellaswag_nl.yaml │ │ │ │ │ ├── hellaswag_pt.yaml │ │ │ │ │ ├── hellaswag_ro.yaml │ │ │ │ │ ├── hellaswag_ru.yaml │ │ │ │ │ ├── hellaswag_sk.yaml │ │ │ │ │ ├── hellaswag_sr.yaml │ │ │ │ │ ├── hellaswag_sv.yaml │ │ │ │ │ ├── hellaswag_ta.yaml │ │ │ │ │ ├── hellaswag_te.yaml │ │ │ │ │ ├── hellaswag_uk.yaml │ │ │ │ │ ├── hellaswag_vi.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── mmlu_multilingual │ │ │ │ │ ├── _default_yaml │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── m_mmlu_ar.yaml │ │ │ │ │ ├── m_mmlu_bn.yaml │ │ │ │ │ ├── m_mmlu_ca.yaml │ │ │ │ │ ├── m_mmlu_da.yaml │ │ │ │ │ ├── m_mmlu_de.yaml │ │ │ │ │ ├── m_mmlu_en.yaml │ │ │ │ │ ├── m_mmlu_es.yaml │ │ │ │ │ ├── m_mmlu_eu.yaml │ │ │ │ │ ├── m_mmlu_fr.yaml │ │ │ │ │ ├── m_mmlu_gu.yaml │ │ │ │ │ ├── m_mmlu_hi.yaml │ │ │ │ │ ├── m_mmlu_hr.yaml │ │ │ │ │ ├── m_mmlu_hu.yaml │ │ │ │ │ ├── m_mmlu_hy.yaml │ │ │ │ │ ├── m_mmlu_id.yaml │ │ │ │ │ ├── m_mmlu_is.yaml │ │ │ │ │ ├── m_mmlu_it.yaml │ │ │ │ │ ├── m_mmlu_kn.yaml │ │ │ │ │ ├── m_mmlu_ml.yaml │ │ │ │ │ ├── m_mmlu_mr.yaml │ │ │ │ │ ├── m_mmlu_nb.yaml │ │ │ │ │ ├── m_mmlu_ne.yaml │ │ │ │ │ ├── m_mmlu_nl.yaml │ │ │ │ │ ├── m_mmlu_pt.yaml │ │ │ │ │ ├── m_mmlu_ro.yaml │ │ │ │ │ ├── m_mmlu_ru.yaml │ │ │ │ │ ├── m_mmlu_sk.yaml │ │ │ │ │ ├── m_mmlu_sr.yaml │ │ │ │ │ ├── m_mmlu_sv.yaml │ │ │ │ │ ├── m_mmlu_ta.yaml │ │ │ │ │ ├── m_mmlu_te.yaml │ │ │ │ │ ├── m_mmlu_uk.yaml │ │ │ │ │ ├── m_mmlu_vi.yaml │ │ │ │ │ └── m_mmlu_zh.yaml │ │ │ │ └── truthfulqa_multilingual │ │ │ │ │ ├── README.md │ │ │ │ │ ├── _truthfulqa_mc1_yaml │ │ │ │ │ ├── _truthfulqa_mc2_yaml │ │ │ │ │ ├── truthfulqa_ar_mc1.yaml │ │ │ │ │ ├── truthfulqa_ar_mc2.yaml │ │ │ │ │ ├── truthfulqa_bn_mc1.yaml │ │ │ │ │ ├── truthfulqa_bn_mc2.yaml │ │ │ │ │ ├── truthfulqa_ca_mc1.yaml │ │ │ │ │ ├── truthfulqa_ca_mc2.yaml │ │ │ │ │ ├── truthfulqa_da_mc1.yaml │ │ │ │ │ ├── truthfulqa_da_mc2.yaml │ │ │ │ │ ├── truthfulqa_de_mc1.yaml │ │ │ │ │ ├── truthfulqa_de_mc2.yaml │ │ │ │ │ ├── truthfulqa_es_mc1.yaml │ │ │ │ │ ├── truthfulqa_es_mc2.yaml │ │ │ │ │ ├── truthfulqa_eu_mc1.yaml │ │ │ │ │ ├── truthfulqa_eu_mc2.yaml │ │ │ │ │ ├── truthfulqa_fr_mc1.yaml │ │ │ │ │ ├── truthfulqa_fr_mc2.yaml │ │ │ │ │ ├── truthfulqa_gu_mc1.yaml │ │ │ │ │ ├── truthfulqa_gu_mc2.yaml │ │ │ │ │ ├── truthfulqa_hi_mc1.yaml │ │ │ │ │ ├── truthfulqa_hi_mc2.yaml │ │ │ │ │ ├── truthfulqa_hr_mc1.yaml │ │ │ │ │ ├── truthfulqa_hr_mc2.yaml │ │ │ │ │ ├── truthfulqa_hu_mc1.yaml │ │ │ │ │ ├── truthfulqa_hu_mc2.yaml │ │ │ │ │ ├── truthfulqa_hy_mc1.yaml │ │ │ │ │ ├── truthfulqa_hy_mc2.yaml │ │ │ │ │ ├── truthfulqa_id_mc1.yaml │ │ │ │ │ ├── truthfulqa_id_mc2.yaml │ │ │ │ │ ├── truthfulqa_it_mc1.yaml │ │ │ │ │ ├── truthfulqa_it_mc2.yaml │ │ │ │ │ ├── truthfulqa_kn_mc1.yaml │ │ │ │ │ ├── truthfulqa_kn_mc2.yaml │ │ │ │ │ ├── truthfulqa_ml_mc1.yaml │ │ │ │ │ ├── truthfulqa_ml_mc2.yaml │ │ │ │ │ ├── truthfulqa_mr_mc1.yaml │ │ │ │ │ ├── truthfulqa_mr_mc2.yaml │ │ │ │ │ ├── truthfulqa_ne_mc1.yaml │ │ │ │ │ ├── truthfulqa_ne_mc2.yaml │ │ │ │ │ ├── truthfulqa_nl_mc1.yaml │ │ │ │ │ ├── truthfulqa_nl_mc2.yaml │ │ │ │ │ ├── truthfulqa_pt_mc1.yaml │ │ │ │ │ ├── truthfulqa_pt_mc2.yaml │ │ │ │ │ ├── truthfulqa_ro_mc1.yaml │ │ │ │ │ ├── truthfulqa_ro_mc2.yaml │ │ │ │ │ ├── truthfulqa_ru_mc1.yaml │ │ │ │ │ ├── truthfulqa_ru_mc2.yaml │ │ │ │ │ ├── truthfulqa_sk_mc1.yaml │ │ │ │ │ ├── truthfulqa_sk_mc2.yaml │ │ │ │ │ ├── truthfulqa_sr_mc1.yaml │ │ │ │ │ ├── truthfulqa_sr_mc2.yaml │ │ │ │ │ ├── truthfulqa_sv_mc1.yaml │ │ │ │ │ ├── truthfulqa_sv_mc2.yaml │ │ │ │ │ ├── truthfulqa_ta_mc1.yaml │ │ │ │ │ ├── truthfulqa_ta_mc2.yaml │ │ │ │ │ ├── truthfulqa_te_mc1.yaml │ │ │ │ │ ├── truthfulqa_te_mc2.yaml │ │ │ │ │ ├── truthfulqa_uk_mc1.yaml │ │ │ │ │ ├── truthfulqa_uk_mc2.yaml │ │ │ │ │ ├── truthfulqa_vi_mc1.yaml │ │ │ │ │ ├── truthfulqa_vi_mc2.yaml │ │ │ │ │ ├── truthfulqa_zh_mc1.yaml │ │ │ │ │ ├── truthfulqa_zh_mc2.yaml │ │ │ │ │ └── utils.py │ │ │ ├── openai_math │ │ │ │ ├── openai_math.yaml │ │ │ │ ├── openai_math_agg64.yaml │ │ │ │ ├── openai_math_cov64.yaml │ │ │ │ ├── openai_math_cov64_train.yaml │ │ │ │ ├── openai_math_maj64_cov64.yaml │ │ │ │ ├── openai_math_maj64_cov64_train.yaml │ │ │ │ ├── openai_math_train.yaml │ │ │ │ └── utils.py │ │ │ ├── openbookqa │ │ │ │ ├── README.md │ │ │ │ └── openbookqa.yaml │ │ │ ├── paloma │ │ │ │ ├── README.md │ │ │ │ ├── _paloma_template │ │ │ │ ├── paloma_4chan_meta_sep.yaml │ │ │ │ ├── paloma_c4_100_domains.yaml │ │ │ │ ├── paloma_c4_en.yaml │ │ │ │ ├── paloma_dolma-v1_5.yaml │ │ │ │ ├── paloma_dolma_100_programing_languages.yaml │ │ │ │ ├── paloma_dolma_100_subreddits.yaml │ │ │ │ ├── paloma_falcon-refinedweb.yaml │ │ │ │ ├── paloma_gab.yaml │ │ │ │ ├── paloma_m2d2_s2orc_unsplit.yaml │ │ │ │ ├── paloma_m2d2_wikipedia_unsplit.yaml │ │ │ │ ├── paloma_manosphere_meta_sep.yaml │ │ │ │ ├── paloma_mc4.yaml │ │ │ │ ├── paloma_ptb.yaml │ │ │ │ ├── paloma_redpajama.yaml │ │ │ │ ├── paloma_twitterAAE_HELM_fixed.yaml │ │ │ │ ├── paloma_utils.py │ │ │ │ └── paloma_wikitext_103.yaml │ │ │ ├── paws-x │ │ │ │ ├── README.md │ │ │ │ ├── _generate_config.py │ │ │ │ ├── _pawsx.yaml │ │ │ │ ├── paws_de.yaml │ │ │ │ ├── paws_en.yaml │ │ │ │ ├── paws_es.yaml │ │ │ │ ├── paws_fr.yaml │ │ │ │ ├── paws_ja.yaml │ │ │ │ ├── paws_ko.yaml │ │ │ │ ├── paws_zh.yaml │ │ │ │ └── pawsx_template_yaml │ │ │ ├── pile │ │ │ │ ├── README.md │ │ │ │ ├── pile_arxiv.yaml │ │ │ │ ├── pile_bookcorpus2.yaml │ │ │ │ ├── pile_books3.yaml │ │ │ │ ├── pile_dm-mathematics.yaml │ │ │ │ ├── pile_enron.yaml │ │ │ │ ├── pile_europarl.yaml │ │ │ │ ├── pile_freelaw.yaml │ │ │ │ ├── pile_github.yaml │ │ │ │ ├── pile_gutenberg.yaml │ │ │ │ ├── pile_hackernews.yaml │ │ │ │ ├── pile_nih-exporter.yaml │ │ │ │ ├── pile_opensubtitles.yaml │ │ │ │ ├── pile_openwebtext2.yaml │ │ │ │ ├── pile_philpapers.yaml │ │ │ │ ├── pile_pile-cc.yaml │ │ │ │ ├── pile_pubmed-abstracts.yaml │ │ │ │ ├── pile_pubmed-central.yaml │ │ │ │ ├── pile_stackexchange.yaml │ │ │ │ ├── pile_ubuntu-irc.yaml │ │ │ │ ├── pile_uspto.yaml │ │ │ │ ├── pile_wikipedia.yaml │ │ │ │ └── pile_youtubesubtitles.yaml │ │ │ ├── pile_10k │ │ │ │ ├── README.md │ │ │ │ └── pile_10k.yaml │ │ │ ├── piqa │ │ │ │ ├── README.md │ │ │ │ └── piqa.yaml │ │ │ ├── polemo2 │ │ │ │ ├── README.md │ │ │ │ ├── polemo2_in.yaml │ │ │ │ └── polemo2_out.yaml │ │ │ ├── portuguese_bench │ │ │ │ ├── README.md │ │ │ │ ├── assin_entailment.yaml │ │ │ │ ├── assin_paraphrase.yaml │ │ │ │ ├── flores_pt │ │ │ │ │ ├── _flores_common_yaml │ │ │ │ │ ├── create_yamls_flores_pt.py │ │ │ │ │ ├── flores_ca-pt.yaml │ │ │ │ │ ├── flores_de-pt.yaml │ │ │ │ │ ├── flores_en-pt.yaml │ │ │ │ │ ├── flores_es-pt.yaml │ │ │ │ │ ├── flores_eu-pt.yaml │ │ │ │ │ ├── flores_fr-pt.yaml │ │ │ │ │ ├── flores_gl-pt.yaml │ │ │ │ │ ├── flores_it-pt.yaml │ │ │ │ │ ├── flores_pt-ca.yaml │ │ │ │ │ ├── flores_pt-de.yaml │ │ │ │ │ ├── flores_pt-en.yaml │ │ │ │ │ ├── flores_pt-es.yaml │ │ │ │ │ ├── flores_pt-eu.yaml │ │ │ │ │ ├── flores_pt-fr.yaml │ │ │ │ │ ├── flores_pt-gl.yaml │ │ │ │ │ ├── flores_pt-it.yaml │ │ │ │ │ └── flores_pt.yaml │ │ │ │ └── portuguese_bench.yaml │ │ │ ├── prost │ │ │ │ ├── README.md │ │ │ │ └── corypaik_prost.yaml │ │ │ ├── pubmedqa │ │ │ │ ├── README.md │ │ │ │ ├── preprocess_pubmedqa.py │ │ │ │ └── pubmedqa.yaml │ │ │ ├── qa4mre │ │ │ │ ├── README.md │ │ │ │ ├── preprocess_qa4mre.py │ │ │ │ ├── qa4mre_2011.yaml │ │ │ │ ├── qa4mre_2012.yaml │ │ │ │ └── qa4mre_2013.yaml │ │ │ ├── qasper │ │ │ │ ├── README.md │ │ │ │ ├── bool.yaml │ │ │ │ ├── freeform.yaml │ │ │ │ ├── metrics.py │ │ │ │ └── utils.py │ │ │ ├── race │ │ │ │ ├── README.md │ │ │ │ ├── preprocess_race.py │ │ │ │ └── race.yaml │ │ │ ├── realtoxicityprompts │ │ │ │ ├── metric.py │ │ │ │ └── realtoxicityprompts.yaml │ │ │ ├── sciq │ │ │ │ ├── README.md │ │ │ │ └── sciq.yaml │ │ │ ├── scrolls │ │ │ │ ├── README.md │ │ │ │ ├── scrolls_contractnli.yaml │ │ │ │ ├── scrolls_govreport.yaml │ │ │ │ ├── scrolls_narrativeqa.yaml │ │ │ │ ├── scrolls_qasper.yaml │ │ │ │ ├── scrolls_qmsum.yaml │ │ │ │ ├── scrolls_quality.yaml │ │ │ │ ├── scrolls_summscreenfd.yaml │ │ │ │ └── task.py │ │ │ ├── siqa │ │ │ │ ├── README.md │ │ │ │ └── siqa.yaml │ │ │ ├── spanish_bench │ │ │ │ ├── README.md │ │ │ │ ├── flores_es │ │ │ │ │ ├── _flores_common_yaml │ │ │ │ │ ├── create_yamls_flores_es.py │ │ │ │ │ ├── flores_ca-es.yaml │ │ │ │ │ ├── flores_de-es.yaml │ │ │ │ │ ├── flores_en-es.yaml │ │ │ │ │ ├── flores_es-ca.yaml │ │ │ │ │ ├── flores_es-de.yaml │ │ │ │ │ ├── flores_es-en.yaml │ │ │ │ │ ├── flores_es-eu.yaml │ │ │ │ │ ├── flores_es-fr.yaml │ │ │ │ │ ├── flores_es-gl.yaml │ │ │ │ │ ├── flores_es-it.yaml │ │ │ │ │ ├── flores_es-pt.yaml │ │ │ │ │ ├── flores_es.yaml │ │ │ │ │ ├── flores_eu-es.yaml │ │ │ │ │ ├── flores_fr-es.yaml │ │ │ │ │ ├── flores_gl-es.yaml │ │ │ │ │ ├── flores_it-es.yaml │ │ │ │ │ └── flores_pt-es.yaml │ │ │ │ ├── mgsm_direct_es_spanish_bench.yaml │ │ │ │ ├── mgsm_direct_es_v2.yaml │ │ │ │ ├── paws_es.yaml │ │ │ │ ├── paws_es_spanish_bench.yaml │ │ │ │ ├── phrases_es │ │ │ │ │ ├── _phrases_es_common.yaml │ │ │ │ │ ├── phrases_es-va.yaml │ │ │ │ │ └── phrases_va-es.yaml │ │ │ │ ├── spanish_bench.yaml │ │ │ │ ├── utils.py │ │ │ │ ├── wnli_es.yaml │ │ │ │ ├── xlsum_es.yaml │ │ │ │ ├── xnli_es.yaml │ │ │ │ ├── xnli_es_spanish_bench.yaml │ │ │ │ └── xquad_es.yaml │ │ │ ├── squad_completion │ │ │ │ ├── README.md │ │ │ │ ├── squad_completion.yaml │ │ │ │ └── task.py │ │ │ ├── squadv2 │ │ │ │ ├── README.md │ │ │ │ ├── squadv2.yaml │ │ │ │ └── task.py │ │ │ ├── storycloze │ │ │ │ ├── README.md │ │ │ │ ├── storycloze_2016.yaml │ │ │ │ └── storycloze_2018.yaml │ │ │ ├── super_glue │ │ │ │ ├── README.md │ │ │ │ ├── boolq │ │ │ │ │ ├── default.yaml │ │ │ │ │ ├── seq2seq.yaml │ │ │ │ │ └── t5-prompt.yaml │ │ │ │ ├── cb │ │ │ │ │ ├── aggregate.py │ │ │ │ │ ├── default.yaml │ │ │ │ │ ├── t5-prompt.yaml │ │ │ │ │ └── t5_utils.py │ │ │ │ ├── copa │ │ │ │ │ ├── default.yaml │ │ │ │ │ ├── t5-prompt.yaml │ │ │ │ │ └── utils.py │ │ │ │ ├── multirc │ │ │ │ │ ├── default.yaml │ │ │ │ │ ├── t5-prompt.yaml │ │ │ │ │ └── t5_utils.py │ │ │ │ ├── record │ │ │ │ │ ├── default.yaml │ │ │ │ │ ├── t5-prompt.yaml │ │ │ │ │ ├── t5_utils.py │ │ │ │ │ └── util.py │ │ │ │ ├── rte │ │ │ │ │ ├── default.yaml │ │ │ │ │ └── t5-prompt.yaml │ │ │ │ ├── wic │ │ │ │ │ ├── default.yaml │ │ │ │ │ └── t5-prompt.yaml │ │ │ │ └── wsc │ │ │ │ │ ├── default.yaml │ │ │ │ │ ├── preprocess_wsc.py │ │ │ │ │ ├── t5-prompt.yaml │ │ │ │ │ └── t5_utils.py │ │ │ ├── swag │ │ │ │ ├── README.md │ │ │ │ └── swag.yaml │ │ │ ├── swde │ │ │ │ ├── README.md │ │ │ │ ├── swde.yaml │ │ │ │ └── task.py │ │ │ ├── tinyBenchmarks │ │ │ │ ├── README.md │ │ │ │ ├── agg_functions.py │ │ │ │ ├── tinyArc.yaml │ │ │ │ ├── tinyBenchmarks.yaml │ │ │ │ ├── tinyGSM8k.yaml │ │ │ │ ├── tinyHellaswag.yaml │ │ │ │ ├── tinyMMLU.yaml │ │ │ │ ├── tinyTruthfulQA_mc1.yaml │ │ │ │ ├── tinyTruthfulQA_mc2.yaml │ │ │ │ ├── tinyWinogrande.yaml │ │ │ │ ├── utils_hellaswag.py │ │ │ │ ├── utils_truthfulqa.py │ │ │ │ └── utils_winogrande.py │ │ │ ├── tmlu │ │ │ │ ├── README.md │ │ │ │ ├── default │ │ │ │ │ ├── _default_template_yaml │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── _tmlu.yaml │ │ │ │ │ ├── tmlu_AST_biology.yaml │ │ │ │ │ ├── tmlu_AST_chemistry.yaml │ │ │ │ │ ├── tmlu_AST_chinese.yaml │ │ │ │ │ ├── tmlu_AST_civics.yaml │ │ │ │ │ ├── tmlu_AST_geography.yaml │ │ │ │ │ ├── tmlu_AST_history.yaml │ │ │ │ │ ├── tmlu_CAP_biology.yaml │ │ │ │ │ ├── tmlu_CAP_chemistry.yaml │ │ │ │ │ ├── tmlu_CAP_chinese.yaml │ │ │ │ │ ├── tmlu_CAP_civics.yaml │ │ │ │ │ ├── tmlu_CAP_earth_science.yaml │ │ │ │ │ ├── tmlu_CAP_geography.yaml │ │ │ │ │ ├── tmlu_CAP_history.yaml │ │ │ │ │ ├── tmlu_GSAT_biology.yaml │ │ │ │ │ ├── tmlu_GSAT_chemistry.yaml │ │ │ │ │ ├── tmlu_GSAT_chinese.yaml │ │ │ │ │ ├── tmlu_GSAT_civics.yaml │ │ │ │ │ ├── tmlu_GSAT_earth_science.yaml │ │ │ │ │ ├── tmlu_GSAT_geography.yaml │ │ │ │ │ ├── tmlu_GSAT_history.yaml │ │ │ │ │ ├── tmlu_accountant.yaml │ │ │ │ │ ├── tmlu_basic_traditional_chinese_medicine.yaml │ │ │ │ │ ├── tmlu_clinical_psychologist.yaml │ │ │ │ │ ├── tmlu_clinical_traditional_chinese_medicine.yaml │ │ │ │ │ ├── tmlu_driving_rule.yaml │ │ │ │ │ ├── tmlu_lawyer_qualification.yaml │ │ │ │ │ ├── tmlu_nutritionist.yaml │ │ │ │ │ ├── tmlu_taiwan_tourist_resources.yaml │ │ │ │ │ ├── tmlu_teacher_qualification.yaml │ │ │ │ │ ├── tmlu_tour_guide.yaml │ │ │ │ │ ├── tmlu_tour_leader.yaml │ │ │ │ │ └── utils.py │ │ │ │ └── subject.tsv │ │ │ ├── tmmluplus │ │ │ │ ├── README.md │ │ │ │ ├── default │ │ │ │ │ ├── _generate_configs.py │ │ │ │ │ ├── _tmmluplus.yaml │ │ │ │ │ ├── _tmmluplus_STEM.yaml │ │ │ │ │ ├── _tmmluplus_humanities.yaml │ │ │ │ │ ├── _tmmluplus_other.yaml │ │ │ │ │ ├── _tmmluplus_social_sciences.yaml │ │ │ │ │ ├── _tmmluplus_template_yaml │ │ │ │ │ ├── tmmluplus_accounting.yaml │ │ │ │ │ ├── tmmluplus_administrative_law.yaml │ │ │ │ │ ├── tmmluplus_advance_chemistry.yaml │ │ │ │ │ ├── tmmluplus_agriculture.yaml │ │ │ │ │ ├── tmmluplus_anti_money_laundering.yaml │ │ │ │ │ ├── tmmluplus_auditing.yaml │ │ │ │ │ ├── tmmluplus_basic_medical_science.yaml │ │ │ │ │ ├── tmmluplus_business_management.yaml │ │ │ │ │ ├── tmmluplus_chinese_language_and_literature.yaml │ │ │ │ │ ├── tmmluplus_clinical_psychology.yaml │ │ │ │ │ ├── tmmluplus_computer_science.yaml │ │ │ │ │ ├── tmmluplus_culinary_skills.yaml │ │ │ │ │ ├── tmmluplus_dentistry.yaml │ │ │ │ │ ├── tmmluplus_economics.yaml │ │ │ │ │ ├── tmmluplus_education.yaml │ │ │ │ │ ├── tmmluplus_education_(profession_level).yaml │ │ │ │ │ ├── tmmluplus_educational_psychology.yaml │ │ │ │ │ ├── tmmluplus_engineering_math.yaml │ │ │ │ │ ├── tmmluplus_finance_banking.yaml │ │ │ │ │ ├── tmmluplus_financial_analysis.yaml │ │ │ │ │ ├── tmmluplus_fire_science.yaml │ │ │ │ │ ├── tmmluplus_general_principles_of_law.yaml │ │ │ │ │ ├── tmmluplus_geography_of_taiwan.yaml │ │ │ │ │ ├── tmmluplus_human_behavior.yaml │ │ │ │ │ ├── tmmluplus_insurance_studies.yaml │ │ │ │ │ ├── tmmluplus_introduction_to_law.yaml │ │ │ │ │ ├── tmmluplus_jce_humanities.yaml │ │ │ │ │ ├── tmmluplus_junior_chemistry.yaml │ │ │ │ │ ├── tmmluplus_junior_chinese_exam.yaml │ │ │ │ │ ├── tmmluplus_junior_math_exam.yaml │ │ │ │ │ ├── tmmluplus_junior_science_exam.yaml │ │ │ │ │ ├── tmmluplus_junior_social_studies.yaml │ │ │ │ │ ├── tmmluplus_linear_algebra.yaml │ │ │ │ │ ├── tmmluplus_logic_reasoning.yaml │ │ │ │ │ ├── tmmluplus_macroeconomics.yaml │ │ │ │ │ ├── tmmluplus_management_accounting.yaml │ │ │ │ │ ├── tmmluplus_marketing_management.yaml │ │ │ │ │ ├── tmmluplus_mechanical.yaml │ │ │ │ │ ├── tmmluplus_music.yaml │ │ │ │ │ ├── tmmluplus_national_protection.yaml │ │ │ │ │ ├── tmmluplus_nautical_science.yaml │ │ │ │ │ ├── tmmluplus_occupational_therapy_for_psychological_disorders.yaml │ │ │ │ │ ├── tmmluplus_official_document_management.yaml │ │ │ │ │ ├── tmmluplus_optometry.yaml │ │ │ │ │ ├── tmmluplus_organic_chemistry.yaml │ │ │ │ │ ├── tmmluplus_pharmacology.yaml │ │ │ │ │ ├── tmmluplus_pharmacy.yaml │ │ │ │ │ ├── tmmluplus_physical_education.yaml │ │ │ │ │ ├── tmmluplus_physics.yaml │ │ │ │ │ ├── tmmluplus_politic_science.yaml │ │ │ │ │ ├── tmmluplus_real_estate.yaml │ │ │ │ │ ├── tmmluplus_secondary_physics.yaml │ │ │ │ │ ├── tmmluplus_statistics_and_machine_learning.yaml │ │ │ │ │ ├── tmmluplus_taiwanese_hokkien.yaml │ │ │ │ │ ├── tmmluplus_taxation.yaml │ │ │ │ │ ├── tmmluplus_technical.yaml │ │ │ │ │ ├── tmmluplus_three_principles_of_people.yaml │ │ │ │ │ ├── tmmluplus_trade.yaml │ │ │ │ │ ├── tmmluplus_traditional_chinese_medicine_clinical_medicine.yaml │ │ │ │ │ ├── tmmluplus_trust_practice.yaml │ │ │ │ │ ├── tmmluplus_ttqav2.yaml │ │ │ │ │ ├── tmmluplus_tve_chinese_language.yaml │ │ │ │ │ ├── tmmluplus_tve_design.yaml │ │ │ │ │ ├── tmmluplus_tve_mathematics.yaml │ │ │ │ │ ├── tmmluplus_tve_natural_sciences.yaml │ │ │ │ │ ├── tmmluplus_veterinary_pathology.yaml │ │ │ │ │ ├── tmmluplus_veterinary_pharmacology.yaml │ │ │ │ │ └── utils.py │ │ │ │ └── subject.tsv │ │ │ ├── toxigen │ │ │ │ ├── README.md │ │ │ │ ├── toxigen.yaml │ │ │ │ └── utils.py │ │ │ ├── translation │ │ │ │ ├── README.md │ │ │ │ ├── iwslt2017_ar-en.yaml │ │ │ │ ├── iwslt2017_en-ar.yaml │ │ │ │ ├── utils.py │ │ │ │ ├── wmt14_en-fr.yaml │ │ │ │ ├── wmt14_fr-en.yaml │ │ │ │ ├── wmt16_de-en.yaml │ │ │ │ ├── wmt16_en-de.yaml │ │ │ │ ├── wmt16_en-ro.yaml │ │ │ │ ├── wmt16_ro-en.yaml │ │ │ │ └── wmt_common_yaml │ │ │ ├── triviaqa │ │ │ │ ├── README.md │ │ │ │ └── default.yaml │ │ │ ├── truthfulqa │ │ │ │ ├── README.md │ │ │ │ ├── truthfulqa_gen.yaml │ │ │ │ ├── truthfulqa_mc1.yaml │ │ │ │ ├── truthfulqa_mc2.yaml │ │ │ │ └── utils.py │ │ │ ├── turkishmmlu │ │ │ │ ├── README.md │ │ │ │ ├── config │ │ │ │ │ ├── Biology.yaml │ │ │ │ │ ├── Chemistry.yaml │ │ │ │ │ ├── Geography.yaml │ │ │ │ │ ├── History.yaml │ │ │ │ │ ├── Mathematics.yaml │ │ │ │ │ ├── Philosophy.yaml │ │ │ │ │ ├── Physics.yaml │ │ │ │ │ ├── Religion_and_Ethics.yaml │ │ │ │ │ ├── Turkish_Language_and_Literature.yaml │ │ │ │ │ └── _turkishmmlu_default_yaml │ │ │ │ └── config_cot │ │ │ │ │ ├── Biology.yaml │ │ │ │ │ ├── Chemistry.yaml │ │ │ │ │ ├── Geography.yaml │ │ │ │ │ ├── History.yaml │ │ │ │ │ ├── Mathematics.yaml │ │ │ │ │ ├── Philosophy.yaml │ │ │ │ │ ├── Physics.yaml │ │ │ │ │ ├── Religion_and_Ethics.yaml │ │ │ │ │ ├── Turkish_Language_and_Literature.yaml │ │ │ │ │ └── _turkishmmlu_cot_default_yaml │ │ │ ├── unitxt │ │ │ │ ├── 20_newsgroups.yaml │ │ │ │ ├── README.md │ │ │ │ ├── ag_news.yaml │ │ │ │ ├── argument_topic.yaml │ │ │ │ ├── atis.yaml │ │ │ │ ├── banking77.yaml │ │ │ │ ├── claim_stance_topic.yaml │ │ │ │ ├── cnn_dailymail.yaml │ │ │ │ ├── coedit_gec.yaml │ │ │ │ ├── dbpedia_14.yaml │ │ │ │ ├── ethos_binary.yaml │ │ │ │ ├── financial_tweets.yaml │ │ │ │ ├── law_stack_exchange.yaml │ │ │ │ ├── ledgar.yaml │ │ │ │ ├── medical_abstracts.yaml │ │ │ │ ├── stsb.yaml │ │ │ │ ├── task.py │ │ │ │ ├── unfair_tos.yaml │ │ │ │ ├── unitxt │ │ │ │ ├── xsum.yaml │ │ │ │ └── yahoo_answers_topics.yaml │ │ │ ├── unscramble │ │ │ │ ├── README.md │ │ │ │ ├── anagrams1.yaml │ │ │ │ ├── anagrams2.yaml │ │ │ │ ├── cycle_letters.yaml │ │ │ │ ├── random_insertion.yaml │ │ │ │ └── reversed_words.yaml │ │ │ ├── webqs │ │ │ │ ├── README.md │ │ │ │ ├── utils.py │ │ │ │ └── webqs.yaml │ │ │ ├── wikitext │ │ │ │ ├── README.md │ │ │ │ ├── preprocess_wikitext.py │ │ │ │ └── wikitext.yaml │ │ │ ├── winogrande │ │ │ │ ├── README.md │ │ │ │ ├── default.yaml │ │ │ │ └── preprocess_winogrande.py │ │ │ ├── wmdp │ │ │ │ ├── README.md │ │ │ │ ├── _default_template_yaml │ │ │ │ ├── _wmdp.yaml │ │ │ │ ├── wmdp_bio.yaml │ │ │ │ ├── wmdp_chem.yaml │ │ │ │ └── wmdp_cyber.yaml │ │ │ ├── wmt2016 │ │ │ │ ├── README.md │ │ │ │ ├── metrics.py │ │ │ │ └── ro_en-t5_prompt.yaml │ │ │ ├── wsc273 │ │ │ │ ├── README.md │ │ │ │ ├── default.yaml │ │ │ │ └── utils.py │ │ │ ├── xcopa │ │ │ │ ├── README.md │ │ │ │ ├── _xcopa.yaml │ │ │ │ ├── default_et.yaml │ │ │ │ ├── default_ht.yaml │ │ │ │ ├── default_id.yaml │ │ │ │ ├── default_it.yaml │ │ │ │ ├── default_qu.yaml │ │ │ │ ├── default_sw.yaml │ │ │ │ ├── default_ta.yaml │ │ │ │ ├── default_th.yaml │ │ │ │ ├── default_tr.yaml │ │ │ │ ├── default_vi.yaml │ │ │ │ ├── default_zh.yaml │ │ │ │ └── utils.py │ │ │ ├── xnli │ │ │ │ ├── README.md │ │ │ │ ├── _xnli.yaml │ │ │ │ ├── utils.py │ │ │ │ ├── xnli_ar.yaml │ │ │ │ ├── xnli_bg.yaml │ │ │ │ ├── xnli_common_yaml │ │ │ │ ├── xnli_de.yaml │ │ │ │ ├── xnli_el.yaml │ │ │ │ ├── xnli_en.yaml │ │ │ │ ├── xnli_es.yaml │ │ │ │ ├── xnli_fr.yaml │ │ │ │ ├── xnli_hi.yaml │ │ │ │ ├── xnli_ru.yaml │ │ │ │ ├── xnli_sw.yaml │ │ │ │ ├── xnli_th.yaml │ │ │ │ ├── xnli_tr.yaml │ │ │ │ ├── xnli_ur.yaml │ │ │ │ ├── xnli_vi.yaml │ │ │ │ └── xnli_zh.yaml │ │ │ ├── xnli_eu │ │ │ │ ├── README.md │ │ │ │ ├── xnli_common_yaml │ │ │ │ ├── xnli_eu.yaml │ │ │ │ ├── xnli_eu_mt.yaml │ │ │ │ └── xnli_eu_native.yaml │ │ │ ├── xstorycloze │ │ │ │ ├── README.md │ │ │ │ ├── _xstorycloze.yaml │ │ │ │ ├── default_ar.yaml │ │ │ │ ├── default_en.yaml │ │ │ │ ├── default_es.yaml │ │ │ │ ├── default_eu.yaml │ │ │ │ ├── default_hi.yaml │ │ │ │ ├── default_id.yaml │ │ │ │ ├── default_my.yaml │ │ │ │ ├── default_ru.yaml │ │ │ │ ├── default_sw.yaml │ │ │ │ ├── default_te.yaml │ │ │ │ └── default_zh.yaml │ │ │ └── xwinograd │ │ │ │ ├── README.md │ │ │ │ ├── _xwinograd.yaml │ │ │ │ ├── utils.py │ │ │ │ ├── xwinograd_common_yaml │ │ │ │ ├── xwinograd_en.yaml │ │ │ │ ├── xwinograd_fr.yaml │ │ │ │ ├── xwinograd_jp.yaml │ │ │ │ ├── xwinograd_pt.yaml │ │ │ │ ├── xwinograd_ru.yaml │ │ │ │ └── xwinograd_zh.yaml │ │ └── utils.py │ ├── mypy.ini │ ├── pyproject.toml │ ├── requirements.txt │ ├── scripts │ │ ├── __init__.py │ │ ├── build_benchmark.py │ │ ├── clean_training_data │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── compress_and_package.py │ │ │ ├── generate_13_grams.py │ │ │ ├── investigate_pile.py │ │ │ ├── janitor_util.cpp │ │ │ ├── process_sorted_buckets.py │ │ │ └── sort_13_gram_buckets.py │ │ ├── cost_estimate.py │ │ ├── get_prompts.py │ │ ├── make_gpt2_test_cases.py │ │ ├── make_table_results.py │ │ ├── make_table_tasks.py │ │ ├── model_comparator.py │ │ ├── regression.py │ │ ├── requests_caching.py │ │ ├── write_out.py │ │ └── zeno_visualize.py │ ├── setup.py │ ├── templates │ │ └── new_yaml_task │ │ │ ├── README.md │ │ │ └── blank_yaml.yaml │ └── tests │ │ ├── __init__.py │ │ ├── models │ │ ├── test_api.py │ │ ├── test_gguf.py │ │ ├── test_huggingface.py │ │ ├── test_neuralmagic.py │ │ ├── test_openvino.py │ │ └── test_vllm.py │ │ ├── test_cli.py │ │ ├── test_evaluator.py │ │ ├── test_include_path.py │ │ ├── test_janitor.py │ │ ├── test_misc.py │ │ ├── test_prompt.py │ │ ├── test_requests_caching.py │ │ ├── test_task_manager.py │ │ ├── test_tasks.py │ │ ├── test_utils.py │ │ ├── testconfigs │ │ └── arc_test.yaml │ │ ├── testdata │ │ ├── ai2_arc_10_hf_pretrained-EleutherAI-pythia-14m-dtype-float32-device-cpu.txt │ │ ├── anagrams1-v0-greedy_until │ │ ├── anagrams2-v0-greedy_until │ │ ├── anli_r1-v0-loglikelihood │ │ ├── anli_r2-v0-loglikelihood │ │ ├── anli_r3-v0-loglikelihood │ │ ├── arc_challenge-v0-loglikelihood │ │ ├── arc_challenge-v2.0-loglikelihood │ │ ├── arc_easy-v0-loglikelihood │ │ ├── arithmetic_1dc-v0-loglikelihood │ │ ├── arithmetic_2da-v0-loglikelihood │ │ ├── arithmetic_2dm-v0-loglikelihood │ │ ├── arithmetic_2ds-v0-loglikelihood │ │ ├── arithmetic_3da-v0-loglikelihood │ │ ├── arithmetic_3ds-v0-loglikelihood │ │ ├── arithmetic_4da-v0-loglikelihood │ │ ├── arithmetic_4ds-v0-loglikelihood │ │ ├── arithmetic_5da-v0-loglikelihood │ │ ├── arithmetic_5ds-v0-loglikelihood │ │ ├── blimp_adjunct_island-v0-loglikelihood │ │ ├── blimp_anaphor_gender_agreement-v0-loglikelihood │ │ ├── blimp_anaphor_number_agreement-v0-loglikelihood │ │ ├── blimp_animate_subject_passive-v0-loglikelihood │ │ ├── blimp_animate_subject_trans-v0-loglikelihood │ │ ├── blimp_causative-v0-loglikelihood │ │ ├── blimp_complex_NP_island-v0-loglikelihood │ │ ├── blimp_coordinate_structure_constraint_complex_left_branch-v0-loglikelihood │ │ ├── blimp_coordinate_structure_constraint_object_extraction-v0-loglikelihood │ │ ├── blimp_determiner_noun_agreement_1-v0-loglikelihood │ │ ├── blimp_determiner_noun_agreement_2-v0-loglikelihood │ │ ├── blimp_determiner_noun_agreement_irregular_1-v0-loglikelihood │ │ ├── blimp_determiner_noun_agreement_irregular_2-v0-loglikelihood │ │ ├── blimp_determiner_noun_agreement_with_adj_2-v0-loglikelihood │ │ ├── blimp_determiner_noun_agreement_with_adj_irregular_1-v0-loglikelihood │ │ ├── blimp_determiner_noun_agreement_with_adj_irregular_2-v0-loglikelihood │ │ ├── blimp_determiner_noun_agreement_with_adjective_1-v0-loglikelihood │ │ ├── blimp_distractor_agreement_relational_noun-v0-loglikelihood │ │ ├── blimp_distractor_agreement_relative_clause-v0-loglikelihood │ │ ├── blimp_drop_argument-v0-loglikelihood │ │ ├── blimp_ellipsis_n_bar_1-v0-loglikelihood │ │ ├── blimp_ellipsis_n_bar_2-v0-loglikelihood │ │ ├── blimp_existential_there_object_raising-v0-loglikelihood │ │ ├── blimp_existential_there_quantifiers_1-v0-loglikelihood │ │ ├── blimp_existential_there_quantifiers_2-v0-loglikelihood │ │ ├── blimp_existential_there_subject_raising-v0-loglikelihood │ │ ├── blimp_expletive_it_object_raising-v0-loglikelihood │ │ ├── blimp_inchoative-v0-loglikelihood │ │ ├── blimp_intransitive-v0-loglikelihood │ │ ├── blimp_irregular_past_participle_adjectives-v0-loglikelihood │ │ ├── blimp_irregular_past_participle_verbs-v0-loglikelihood │ │ ├── blimp_irregular_plural_subject_verb_agreement_1-v0-loglikelihood │ │ ├── blimp_irregular_plural_subject_verb_agreement_2-v0-loglikelihood │ │ ├── blimp_left_branch_island_echo_question-v0-loglikelihood │ │ ├── blimp_left_branch_island_simple_question-v0-loglikelihood │ │ ├── blimp_matrix_question_npi_licensor_present-v0-loglikelihood │ │ ├── blimp_npi_present_1-v0-loglikelihood │ │ ├── blimp_npi_present_2-v0-loglikelihood │ │ ├── blimp_only_npi_licensor_present-v0-loglikelihood │ │ ├── blimp_only_npi_scope-v0-loglikelihood │ │ ├── blimp_passive_1-v0-loglikelihood │ │ ├── blimp_passive_2-v0-loglikelihood │ │ ├── blimp_principle_A_c_command-v0-loglikelihood │ │ ├── blimp_principle_A_case_1-v0-loglikelihood │ │ ├── blimp_principle_A_case_2-v0-loglikelihood │ │ ├── blimp_principle_A_domain_1-v0-loglikelihood │ │ ├── blimp_principle_A_domain_2-v0-loglikelihood │ │ ├── blimp_principle_A_domain_3-v0-loglikelihood │ │ ├── blimp_principle_A_reconstruction-v0-loglikelihood │ │ ├── blimp_regular_plural_subject_verb_agreement_1-v0-loglikelihood │ │ ├── blimp_regular_plural_subject_verb_agreement_2-v0-loglikelihood │ │ ├── blimp_sentential_negation_npi_licensor_present-v0-loglikelihood │ │ ├── blimp_sentential_negation_npi_scope-v0-loglikelihood │ │ ├── blimp_sentential_subject_island-v0-loglikelihood │ │ ├── blimp_superlative_quantifiers_1-v0-loglikelihood │ │ ├── blimp_superlative_quantifiers_2-v0-loglikelihood │ │ ├── blimp_tough_vs_raising_1-v0-loglikelihood │ │ ├── blimp_tough_vs_raising_2-v0-loglikelihood │ │ ├── blimp_transitive-v0-loglikelihood │ │ ├── blimp_wh_island-v0-loglikelihood │ │ ├── blimp_wh_questions_object_gap-v0-loglikelihood │ │ ├── blimp_wh_questions_subject_gap-v0-loglikelihood │ │ ├── blimp_wh_questions_subject_gap_long_distance-v0-loglikelihood │ │ ├── blimp_wh_vs_that_no_gap-v0-loglikelihood │ │ ├── blimp_wh_vs_that_no_gap_long_distance-v0-loglikelihood │ │ ├── blimp_wh_vs_that_with_gap-v0-loglikelihood │ │ ├── blimp_wh_vs_that_with_gap_long_distance-v0-loglikelihood │ │ ├── boolq-v0-loglikelihood │ │ ├── boolq-v1-loglikelihood │ │ ├── cb-v0-loglikelihood │ │ ├── cb-v1-loglikelihood │ │ ├── cola-v0-loglikelihood │ │ ├── copa-v0-loglikelihood │ │ ├── coqa-v0-greedy_until │ │ ├── coqa-v1-greedy_until │ │ ├── crows_pairs_english-v0-loglikelihood │ │ ├── crows_pairs_english_age-v0-loglikelihood │ │ ├── crows_pairs_english_autre-v0-loglikelihood │ │ ├── crows_pairs_english_disability-v0-loglikelihood │ │ ├── crows_pairs_english_gender-v0-loglikelihood │ │ ├── crows_pairs_english_nationality-v0-loglikelihood │ │ ├── crows_pairs_english_physical_appearance-v0-loglikelihood │ │ ├── crows_pairs_english_race_color-v0-loglikelihood │ │ ├── crows_pairs_english_religion-v0-loglikelihood │ │ ├── crows_pairs_english_sexual_orientation-v0-loglikelihood │ │ ├── crows_pairs_english_socioeconomic-v0-loglikelihood │ │ ├── crows_pairs_french-v0-loglikelihood │ │ ├── crows_pairs_french_age-v0-loglikelihood │ │ ├── crows_pairs_french_autre-v0-loglikelihood │ │ ├── crows_pairs_french_disability-v0-loglikelihood │ │ ├── crows_pairs_french_gender-v0-loglikelihood │ │ ├── crows_pairs_french_nationality-v0-loglikelihood │ │ ├── crows_pairs_french_physical_appearance-v0-loglikelihood │ │ ├── crows_pairs_french_race_color-v0-loglikelihood │ │ ├── crows_pairs_french_religion-v0-loglikelihood │ │ ├── crows_pairs_french_sexual_orientation-v0-loglikelihood │ │ ├── crows_pairs_french_socioeconomic-v0-loglikelihood │ │ ├── cycle_letters-v0-greedy_until │ │ ├── drop-v0-greedy_until │ │ ├── drop-v1-greedy_until │ │ ├── ethics_cm-v0-loglikelihood │ │ ├── ethics_deontology-v0-loglikelihood │ │ ├── ethics_justice-v0-loglikelihood │ │ ├── ethics_utilitarianism-v0-loglikelihood │ │ ├── ethics_utilitarianism_original-v0-loglikelihood │ │ ├── ethics_virtue-v0-loglikelihood │ │ ├── gguf_test_44e268d15decc4d2d0f99e57e1476269826cd3b54262f7a0981f75ddd45b25d0.pkl │ │ ├── gguf_test_52ea409606de8755e03cf7c79f824101a4ce64bb6e6d3df556b8a4e7a5d92418.pkl │ │ ├── gguf_test_8fcf3f2f52afeb2acd7c8e02c2cc3ce31a691b665d295f6c4e4bbd71c7caa1a2.pkl │ │ ├── gpt3_test_0deb8e9bde8e8327bbc48157f638ff3ba06b0cd816dad2beb8ad90f7fbe795c7.pkl │ │ ├── gpt3_test_8025023377febbd8c5f2b9f26705c394ff375d0cad7c89c10fd9b8e1eb66ff1c.pkl │ │ ├── gpt3_test_bb2cc49115e88788ed870ad0716eb00b280a885f91c7ed6e1e864435e5e2b6ac.pkl │ │ ├── gpt3_test_cfd11f555a5a63b6dfa114a55a932e51b724cdd44d4842586b9ce37260bf7aaa.pkl │ │ ├── gpt3_test_f307d52964c295e2005c5e782b688c24388e0cecadf29f1e6fc7f394236ea9c0.pkl │ │ ├── gsm8k-v0-greedy_until │ │ ├── headqa-v0-loglikelihood │ │ ├── headqa_en-v0-loglikelihood │ │ ├── headqa_es-v0-loglikelihood │ │ ├── hellaswag-v0-loglikelihood │ │ ├── hendrycksTest-abstract_algebra-v0-loglikelihood │ │ ├── hendrycksTest-anatomy-v0-loglikelihood │ │ ├── hendrycksTest-astronomy-v0-loglikelihood │ │ ├── hendrycksTest-business_ethics-v0-loglikelihood │ │ ├── hendrycksTest-clinical_knowledge-v0-loglikelihood │ │ ├── hendrycksTest-college_biology-v0-loglikelihood │ │ ├── hendrycksTest-college_chemistry-v0-loglikelihood │ │ ├── hendrycksTest-college_computer_science-v0-loglikelihood │ │ ├── hendrycksTest-college_mathematics-v0-loglikelihood │ │ ├── hendrycksTest-college_medicine-v0-loglikelihood │ │ ├── hendrycksTest-college_physics-v0-loglikelihood │ │ ├── hendrycksTest-computer_security-v0-loglikelihood │ │ ├── hendrycksTest-conceptual_physics-v0-loglikelihood │ │ ├── hendrycksTest-econometrics-v0-loglikelihood │ │ ├── hendrycksTest-electrical_engineering-v0-loglikelihood │ │ ├── hendrycksTest-elementary_mathematics-v0-loglikelihood │ │ ├── hendrycksTest-formal_logic-v0-loglikelihood │ │ ├── hendrycksTest-global_facts-v0-loglikelihood │ │ ├── hendrycksTest-high_school_biology-v0-loglikelihood │ │ ├── hendrycksTest-high_school_chemistry-v0-loglikelihood │ │ ├── hendrycksTest-high_school_computer_science-v0-loglikelihood │ │ ├── hendrycksTest-high_school_european_history-v0-loglikelihood │ │ ├── hendrycksTest-high_school_geography-v0-loglikelihood │ │ ├── hendrycksTest-high_school_government_and_politics-v0-loglikelihood │ │ ├── hendrycksTest-high_school_macroeconomics-v0-loglikelihood │ │ ├── hendrycksTest-high_school_mathematics-v0-loglikelihood │ │ ├── hendrycksTest-high_school_microeconomics-v0-loglikelihood │ │ ├── hendrycksTest-high_school_physics-v0-loglikelihood │ │ ├── hendrycksTest-high_school_psychology-v0-loglikelihood │ │ ├── hendrycksTest-high_school_statistics-v0-loglikelihood │ │ ├── hendrycksTest-high_school_us_history-v0-loglikelihood │ │ ├── hendrycksTest-high_school_world_history-v0-loglikelihood │ │ ├── hendrycksTest-human_aging-v0-loglikelihood │ │ ├── hendrycksTest-human_sexuality-v0-loglikelihood │ │ ├── hendrycksTest-international_law-v0-loglikelihood │ │ ├── hendrycksTest-jurisprudence-v0-loglikelihood │ │ ├── hendrycksTest-logical_fallacies-v0-loglikelihood │ │ ├── hendrycksTest-machine_learning-v0-loglikelihood │ │ ├── hendrycksTest-management-v0-loglikelihood │ │ ├── hendrycksTest-marketing-v0-loglikelihood │ │ ├── hendrycksTest-medical_genetics-v0-loglikelihood │ │ ├── hendrycksTest-miscellaneous-v0-loglikelihood │ │ ├── hendrycksTest-moral_disputes-v0-loglikelihood │ │ ├── hendrycksTest-moral_scenarios-v0-loglikelihood │ │ ├── hendrycksTest-nutrition-v0-loglikelihood │ │ ├── hendrycksTest-philosophy-v0-loglikelihood │ │ ├── hendrycksTest-prehistory-v0-loglikelihood │ │ ├── hendrycksTest-professional_accounting-v0-loglikelihood │ │ ├── hendrycksTest-professional_law-v0-loglikelihood │ │ ├── hendrycksTest-professional_medicine-v0-loglikelihood │ │ ├── hendrycksTest-professional_psychology-v0-loglikelihood │ │ ├── hendrycksTest-public_relations-v0-loglikelihood │ │ ├── hendrycksTest-security_studies-v0-loglikelihood │ │ ├── hendrycksTest-sociology-v0-loglikelihood │ │ ├── hendrycksTest-us_foreign_policy-v0-loglikelihood │ │ ├── hendrycksTest-virology-v0-loglikelihood │ │ ├── hendrycksTest-world_religions-v0-loglikelihood │ │ ├── iwslt17-ar-en-v0-greedy_until │ │ ├── iwslt17-en-ar-v0-greedy_until │ │ ├── lambada-v0-loglikelihood │ │ ├── lambada_cloze-v0-loglikelihood │ │ ├── lambada_mt_de-v0-loglikelihood │ │ ├── lambada_mt_en-v0-loglikelihood │ │ ├── lambada_mt_es-v0-loglikelihood │ │ ├── lambada_mt_fr-v0-loglikelihood │ │ ├── lambada_mt_it-v0-loglikelihood │ │ ├── lambada_openai-v0-loglikelihood │ │ ├── lambada_openai-v2.0-loglikelihood │ │ ├── lambada_openai_10_hf_pretrained-EleutherAI-pythia-14m-dtype-float32-device-cpu.txt │ │ ├── lambada_openai_cloze-v0-loglikelihood │ │ ├── lambada_openai_mt_de-v0-loglikelihood │ │ ├── lambada_openai_mt_en-v0-loglikelihood │ │ ├── lambada_openai_mt_es-v0-loglikelihood │ │ ├── lambada_openai_mt_fr-v0-loglikelihood │ │ ├── lambada_openai_mt_it-v0-loglikelihood │ │ ├── lambada_standard-v0-loglikelihood │ │ ├── lambada_standard_cloze-v0-loglikelihood │ │ ├── logiqa-v0-loglikelihood │ │ ├── math_algebra-v0-greedy_until │ │ ├── math_algebra-v1-greedy_until │ │ ├── math_counting_and_prob-v0-greedy_until │ │ ├── math_counting_and_prob-v1-greedy_until │ │ ├── math_geometry-v0-greedy_until │ │ ├── math_geometry-v1-greedy_until │ │ ├── math_intermediate_algebra-v0-greedy_until │ │ ├── math_intermediate_algebra-v1-greedy_until │ │ ├── math_num_theory-v0-greedy_until │ │ ├── math_num_theory-v1-greedy_until │ │ ├── math_prealgebra-v0-greedy_until │ │ ├── math_prealgebra-v1-greedy_until │ │ ├── math_precalc-v0-greedy_until │ │ ├── math_precalc-v1-greedy_until │ │ ├── mathqa-v0-loglikelihood │ │ ├── mc_taco-v0-loglikelihood │ │ ├── mmlu_stem_10_hf_pretrained-EleutherAI-pythia-14m-dtype-float32-device-cpu.txt │ │ ├── mnli-v0-loglikelihood │ │ ├── mnli_mismatched-v0-loglikelihood │ │ ├── mrpc-v0-loglikelihood │ │ ├── multirc-v0-loglikelihood │ │ ├── multirc-v1-loglikelihood │ │ ├── mutual-v0-loglikelihood │ │ ├── mutual-v1-loglikelihood │ │ ├── mutual_plus-v0-loglikelihood │ │ ├── mutual_plus-v1-loglikelihood │ │ ├── openbookqa-v0-loglikelihood │ │ ├── pile_arxiv-v0-loglikelihood_rolling │ │ ├── pile_arxiv-v1-loglikelihood_rolling │ │ ├── pile_bookcorpus2-v0-loglikelihood_rolling │ │ ├── pile_bookcorpus2-v1-loglikelihood_rolling │ │ ├── pile_books3-v0-loglikelihood_rolling │ │ ├── pile_books3-v1-loglikelihood_rolling │ │ ├── pile_dm-mathematics-v0-loglikelihood_rolling │ │ ├── pile_dm-mathematics-v1-loglikelihood_rolling │ │ ├── pile_enron-v0-loglikelihood_rolling │ │ ├── pile_enron-v1-loglikelihood_rolling │ │ ├── pile_europarl-v0-loglikelihood_rolling │ │ ├── pile_europarl-v1-loglikelihood_rolling │ │ ├── pile_freelaw-v0-loglikelihood_rolling │ │ ├── pile_freelaw-v1-loglikelihood_rolling │ │ ├── pile_github-v0-loglikelihood_rolling │ │ ├── pile_github-v1-loglikelihood_rolling │ │ ├── pile_gutenberg-v0-loglikelihood_rolling │ │ ├── pile_gutenberg-v1-loglikelihood_rolling │ │ ├── pile_hackernews-v0-loglikelihood_rolling │ │ ├── pile_hackernews-v1-loglikelihood_rolling │ │ ├── pile_nih-exporter-v0-loglikelihood_rolling │ │ ├── pile_nih-exporter-v1-loglikelihood_rolling │ │ ├── pile_opensubtitles-v0-loglikelihood_rolling │ │ ├── pile_opensubtitles-v1-loglikelihood_rolling │ │ ├── pile_openwebtext2-v0-loglikelihood_rolling │ │ ├── pile_openwebtext2-v1-loglikelihood_rolling │ │ ├── pile_philpapers-v0-loglikelihood_rolling │ │ ├── pile_philpapers-v1-loglikelihood_rolling │ │ ├── pile_pile-cc-v0-loglikelihood_rolling │ │ ├── pile_pile-cc-v1-loglikelihood_rolling │ │ ├── pile_pubmed-abstracts-v0-loglikelihood_rolling │ │ ├── pile_pubmed-abstracts-v1-loglikelihood_rolling │ │ ├── pile_pubmed-central-v0-loglikelihood_rolling │ │ ├── pile_pubmed-central-v1-loglikelihood_rolling │ │ ├── pile_stackexchange-v0-loglikelihood_rolling │ │ ├── pile_stackexchange-v1-loglikelihood_rolling │ │ ├── pile_ubuntu-irc-v0-loglikelihood_rolling │ │ ├── pile_ubuntu-irc-v1-loglikelihood_rolling │ │ ├── pile_uspto-v0-loglikelihood_rolling │ │ ├── pile_uspto-v1-loglikelihood_rolling │ │ ├── pile_wikipedia-v0-loglikelihood_rolling │ │ ├── pile_wikipedia-v1-loglikelihood_rolling │ │ ├── pile_youtubesubtitles-v0-loglikelihood_rolling │ │ ├── pile_youtubesubtitles-v1-loglikelihood_rolling │ │ ├── piqa-v0-loglikelihood │ │ ├── prost-v0-loglikelihood │ │ ├── pubmedqa-v0-loglikelihood │ │ ├── qa4mre_2011-v0-loglikelihood │ │ ├── qa4mre_2012-v0-loglikelihood │ │ ├── qa4mre_2013-v0-loglikelihood │ │ ├── qnli-v0-loglikelihood │ │ ├── qqp-v0-loglikelihood │ │ ├── race-v0-loglikelihood │ │ ├── random_insertion-v0-greedy_until │ │ ├── record-v0-loglikelihood │ │ ├── reversed_words-v0-greedy_until │ │ ├── rte-v0-loglikelihood │ │ ├── sciq-v0-loglikelihood │ │ ├── squad2-v0-greedy_until │ │ ├── squad2-v0-loglikelihood │ │ ├── squad2-v1-greedy_until │ │ ├── squad2-v1-loglikelihood │ │ ├── sst-v0-loglikelihood │ │ ├── swag-v0-loglikelihood │ │ ├── textsynth_test_0a89c2739f9598b4be2674b0a8e43931d7f3f0b696970bcba31f9b52bdf12297.pkl │ │ ├── textsynth_test_0c1c14571add7903b89e588c8212572b95bb57b334fc0752c89a7e045a5f63ae.pkl │ │ ├── textsynth_test_3092d07756f3e1d010c07524cc8a2ecba7f0c19f9e39f2aaf2bf440bfe328004.pkl │ │ ├── textsynth_test_434076260b6af3a46b7a5eaceec3306a5872c400a3872f744280b237455a0f8e.pkl │ │ ├── textsynth_test_49c47ae40e11f349f2f6b492128188b1b2bc103a421c676ee4b2142a68b43516.pkl │ │ ├── textsynth_test_4fd8d66a6dad7f602b40e5d7dc298d6fe329299d086a4659743a41f4a4012659.pkl │ │ ├── textsynth_test_51b5302f157cf224f694ccad973f255ae19e9e061d533256bdf75b04e0a917ab.pkl │ │ ├── textsynth_test_6d6c62dd70caaa208712bf766deaf419cfac89538d4ab7745621e339394c0c23.pkl │ │ ├── textsynth_test_7209c4617547bfe17cb9e7f5f735fe35822d650aefdc5fbeeaf0c1724effbe09.pkl │ │ ├── textsynth_test_7afdc285388e51094e12645f305328c759574fa3ec9751631025f8ad5ebf9f3e.pkl │ │ ├── textsynth_test_9d5f33dbfe1e254928c89f5ed85e4c010d888065f55a8f1b863bc1eb0340a5f2.pkl │ │ ├── textsynth_test_abcbcba648d89e5d81a50511a6d24ddeb538de2ffe108c1370dd74ce6ac8038d.pkl │ │ ├── textsynth_test_b1cbb29666cce5e31a1e97695858137398a0885ca5d5d98f515404fb6aeb99e7.pkl │ │ ├── textsynth_test_e7ad1e9f52a39e1ddd1e50f3c57ffa4546728dd150a67c0a0ddc8675c04e15d1.pkl │ │ ├── textsynth_test_f4bfe4beb605bd52a8ab6be3c9293639e7e2261d98de58159d15ccb83131bf4e.pkl │ │ ├── toxigen-v0-loglikelihood │ │ ├── triviaqa-v0-loglikelihood │ │ ├── triviaqa-v1-loglikelihood │ │ ├── truthfulqa_gen-v0-greedy_until │ │ ├── truthfulqa_gen-v1-greedy_until │ │ ├── truthfulqa_mc-v0-loglikelihood │ │ ├── truthfulqa_mc-v1-loglikelihood │ │ ├── webqs-v0-loglikelihood │ │ ├── wic-v0-loglikelihood │ │ ├── wikitext-v0-loglikelihood_rolling │ │ ├── wikitext-v1-loglikelihood_rolling │ │ ├── wikitext_10_hf_pretrained-EleutherAI-pythia-14m-dtype-float32-device-cpu.txt │ │ ├── winogrande-v0-loglikelihood │ │ ├── wmt14-en-fr-v0-greedy_until │ │ ├── wmt14-fr-en-v0-greedy_until │ │ ├── wmt16-de-en-v0-greedy_until │ │ ├── wmt16-en-de-v0-greedy_until │ │ ├── wmt16-en-ro-v0-greedy_until │ │ ├── wmt16-ro-en-v0-greedy_until │ │ ├── wmt20-cs-en-v0-greedy_until │ │ ├── wmt20-de-en-v0-greedy_until │ │ ├── wmt20-de-fr-v0-greedy_until │ │ ├── wmt20-en-cs-v0-greedy_until │ │ ├── wmt20-en-de-v0-greedy_until │ │ ├── wmt20-en-iu-v0-greedy_until │ │ ├── wmt20-en-ja-v0-greedy_until │ │ ├── wmt20-en-ja-v1-greedy_until │ │ ├── wmt20-en-km-v0-greedy_until │ │ ├── wmt20-en-pl-v0-greedy_until │ │ ├── wmt20-en-ps-v0-greedy_until │ │ ├── wmt20-en-ru-v0-greedy_until │ │ ├── wmt20-en-ta-v0-greedy_until │ │ ├── wmt20-en-zh-v0-greedy_until │ │ ├── wmt20-en-zh-v1-greedy_until │ │ ├── wmt20-fr-de-v0-greedy_until │ │ ├── wmt20-iu-en-v0-greedy_until │ │ ├── wmt20-ja-en-v0-greedy_until │ │ ├── wmt20-km-en-v0-greedy_until │ │ ├── wmt20-pl-en-v0-greedy_until │ │ ├── wmt20-ps-en-v0-greedy_until │ │ ├── wmt20-ru-en-v0-greedy_until │ │ ├── wmt20-ta-en-v0-greedy_until │ │ ├── wmt20-zh-en-v0-greedy_until │ │ ├── wnli-v0-loglikelihood │ │ ├── wnli-v1-loglikelihood │ │ ├── wsc-v0-loglikelihood │ │ └── wsc273-v0-loglikelihood │ │ ├── testyamls │ │ └── test-01.yaml │ │ └── utils.py └── rebase │ ├── inference_scaling │ ├── .gitmodules │ ├── README.md │ ├── evaluate │ │ ├── __init__.py │ │ ├── data_processing │ │ │ ├── answer_extraction.py │ │ │ └── process_utils.py │ │ └── evaluate_utils │ │ │ ├── __init__.py │ │ │ ├── grader.py │ │ │ └── math_normalize.py │ ├── exp_results │ │ ├── .gitignore │ │ ├── combine_answers.py │ │ ├── rebase_1 │ │ │ └── results.txt │ │ └── rebase_16 │ │ │ ├── combine_answers.py │ │ │ └── results.txt │ ├── finetune │ │ ├── README.md │ │ ├── gpt-accelera │ │ │ ├── CODE_OF_CONDUCT.md │ │ │ ├── CONTRIBUTING.md │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── convert_checkpoint_to_hf.py │ │ │ ├── data_utils │ │ │ │ ├── common_utils.py │ │ │ │ ├── data_utils_pm_pairwise.py │ │ │ │ ├── data_utils_rm_pairwise.py │ │ │ │ ├── data_utils_rm_pointwise.py │ │ │ │ ├── data_utils_sft.py │ │ │ │ └── tokenizer_utils.py │ │ │ ├── finetune.py │ │ │ ├── finetune_rm.py │ │ │ ├── finetune_rm_pairwise.py │ │ │ ├── models │ │ │ │ ├── GPTQ.py │ │ │ │ ├── model.py │ │ │ │ ├── ppo_trainer.py │ │ │ │ ├── quantize.py │ │ │ │ ├── reward_model.py │ │ │ │ ├── rl_model.py │ │ │ │ ├── rl_trainer.py │ │ │ │ └── tp.py │ │ │ ├── requirements.txt │ │ │ ├── scripts │ │ │ │ ├── convert_hf_checkpoint.py │ │ │ │ └── download.py │ │ │ ├── scripts_finetune │ │ │ │ ├── convert.sh │ │ │ │ ├── finetune_rm.sh │ │ │ │ ├── finetune_sft.sh │ │ │ │ ├── prepare_llemma_34b.sh │ │ │ │ └── prepare_llemma_7b.sh │ │ │ ├── setup.py │ │ │ ├── training_utils │ │ │ │ ├── checkpoint_hook.py │ │ │ │ ├── fsdp_utils.py │ │ │ │ ├── hf_argparser.py │ │ │ │ ├── memory_efficient_adam.py │ │ │ │ ├── trainer_utils.py │ │ │ │ └── training_args.py │ │ │ └── util.py │ │ └── hf_finetune │ │ │ ├── hf_finetune.sh │ │ │ └── hf_train.py │ ├── hf_score.py │ ├── hype-parameters │ │ ├── rebase.yaml │ │ ├── rebase16.yaml │ │ └── rebase8.yaml │ ├── math_evaluate.py │ ├── rebase.py │ ├── rebase_ori.py │ ├── scripts │ │ ├── run_policy_our.sh │ │ └── run_reward.sh │ └── sgl_baseline.py │ ├── run.sh │ ├── sglang │ ├── LICENSE │ ├── README.md │ ├── assets │ │ ├── llama_7b.jpg │ │ ├── logo.png │ │ ├── logo_square.png │ │ └── mixtral_8x7b.jpg │ ├── benchmark │ │ ├── dspy │ │ │ ├── README.md │ │ │ └── bench_dspy_intro.py │ │ ├── generative_agents │ │ │ ├── README.md │ │ │ ├── agent_functions.py │ │ │ ├── bench_other.py │ │ │ └── bench_sglang.py │ │ ├── gsm8k │ │ │ ├── README.md │ │ │ ├── bench_other.py │ │ │ └── bench_sglang.py │ │ ├── hellaswag │ │ │ ├── README.md │ │ │ ├── bench_other.py │ │ │ └── bench_sglang.py │ │ ├── json_decode_regex │ │ │ ├── README.md │ │ │ ├── bench_other.py │ │ │ ├── bench_sglang.py │ │ │ └── build_dataset.py │ │ ├── json_jump_forward │ │ │ ├── README.md │ │ │ ├── bench_other.py │ │ │ ├── bench_sglang.py │ │ │ ├── build_dataset.py │ │ │ └── dataset.txt │ │ ├── latency_throughput │ │ │ ├── README.md │ │ │ ├── bench_throughput.py │ │ │ └── test_latency.py │ │ ├── line_retrieval │ │ │ ├── README.md │ │ │ ├── bench_sglang.py │ │ │ └── gen_data.py │ │ ├── llava_bench │ │ │ ├── README.md │ │ │ ├── bench_hf_llava_bench.sh │ │ │ ├── bench_hf_mme.sh │ │ │ ├── bench_sglang.py │ │ │ ├── bench_sglang_mme.sh │ │ │ ├── download_images.py │ │ │ └── questions.jsonl │ │ ├── llm_judge │ │ │ ├── README.md │ │ │ ├── articles.jsonl │ │ │ ├── bench_other.py │ │ │ └── bench_sglang.py │ │ ├── long_json_decode │ │ │ ├── README.md │ │ │ ├── bench_other.py │ │ │ ├── bench_sglang.py │ │ │ └── build_dataset.py │ │ ├── mmlu │ │ │ ├── README.md │ │ │ ├── bench_other.py │ │ │ └── bench_sglang.py │ │ ├── mtbench │ │ │ ├── README.md │ │ │ ├── bench_other.py │ │ │ └── bench_sglang.py │ │ ├── multi_chain_reasoning │ │ │ ├── README.md │ │ │ ├── bench_other.py │ │ │ └── bench_sglang.py │ │ ├── multi_document_qa │ │ │ ├── README.md │ │ │ ├── bench_other.py │ │ │ ├── bench_sglang.py │ │ │ └── build_dataset.py │ │ ├── multi_turn_chat │ │ │ ├── README.md │ │ │ ├── bench_other.py │ │ │ ├── bench_sglang.py │ │ │ └── data_gen.py │ │ ├── react │ │ │ ├── README.md │ │ │ ├── bench_other.py │ │ │ ├── bench_sglang.py │ │ │ └── hotpotqa_100.jsonl │ │ ├── tip_suggestion │ │ │ ├── README.md │ │ │ ├── bench_other.py │ │ │ └── bench_sglang.py │ │ ├── tree_of_thought_deep │ │ │ ├── README.md │ │ │ ├── bench_other.py │ │ │ └── bench_sglang.py │ │ └── tree_of_thought_v0 │ │ │ ├── README.md │ │ │ ├── bench_other.py │ │ │ └── bench_sglang.py │ ├── docs │ │ ├── benchmark_results.md │ │ ├── flashinfer.md │ │ ├── model_support.md │ │ ├── sampling_params.md │ │ └── test_process.md │ ├── examples │ │ ├── quick_start │ │ │ ├── anthropic_example_chat.py │ │ │ ├── anthropic_example_complete.py │ │ │ ├── azure_openai_example_chat.py │ │ │ ├── gemini_example_chat.py │ │ │ ├── gemini_example_complete.py │ │ │ ├── gemini_example_multimodal_chat.py │ │ │ ├── images │ │ │ │ ├── cat.jpeg │ │ │ │ └── dog.jpeg │ │ │ ├── openai_example_chat.py │ │ │ ├── openai_example_complete.py │ │ │ ├── srt_example_chat.py │ │ │ ├── srt_example_complete.py │ │ │ ├── srt_example_llava.py │ │ │ ├── srt_example_yi_vl.py │ │ │ ├── together_example_chat.py │ │ │ └── together_example_complete.py │ │ └── usage │ │ │ ├── async_io.py │ │ │ ├── choices_logprob.py │ │ │ ├── json_decode.py │ │ │ ├── openai_speculative.py │ │ │ ├── parallel_sample.py │ │ │ ├── readme_examples.py │ │ │ ├── streaming.py │ │ │ └── triton │ │ │ ├── Dockerfile │ │ │ ├── README.md │ │ │ └── models │ │ │ └── character_generation │ │ │ ├── 1 │ │ │ └── model.py │ │ │ └── config.pbtxt │ ├── playground │ │ ├── launch_tgi.sh │ │ └── load_tokenizer.py │ ├── python │ │ ├── pyproject.toml │ │ ├── sglang.egg-info │ │ │ ├── PKG-INFO │ │ │ ├── SOURCES.txt │ │ │ ├── dependency_links.txt │ │ │ ├── requires.txt │ │ │ └── top_level.txt │ │ ├── sglang │ │ │ ├── __init__.py │ │ │ ├── api.py │ │ │ ├── backend │ │ │ │ ├── __init__.py │ │ │ │ ├── anthropic.py │ │ │ │ ├── base_backend.py │ │ │ │ ├── openai.py │ │ │ │ ├── runtime_endpoint.py │ │ │ │ └── vertexai.py │ │ │ ├── global_config.py │ │ │ ├── lang │ │ │ │ ├── __init__.py │ │ │ │ ├── chat_template.py │ │ │ │ ├── compiler.py │ │ │ │ ├── interpreter.py │ │ │ │ ├── ir.py │ │ │ │ └── tracer.py │ │ │ ├── launch_server.py │ │ │ ├── srt │ │ │ │ ├── backend_config.py │ │ │ │ ├── constrained │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base_cache.py │ │ │ │ │ ├── fsm_cache.py │ │ │ │ │ └── jump_forward.py │ │ │ │ ├── conversation.py │ │ │ │ ├── hf_transformers_utils.py │ │ │ │ ├── layers │ │ │ │ │ ├── context_flashattention_nopad.py │ │ │ │ │ ├── extend_attention.py │ │ │ │ │ ├── logits_processor.py │ │ │ │ │ ├── radix_attention.py │ │ │ │ │ └── token_attention.py │ │ │ │ ├── managers │ │ │ │ │ ├── detokenizer_manager.py │ │ │ │ │ ├── io_struct.py │ │ │ │ │ ├── openai_protocol.py │ │ │ │ │ ├── router │ │ │ │ │ │ ├── infer_batch.py │ │ │ │ │ │ ├── manager.py │ │ │ │ │ │ ├── model_rpc.py │ │ │ │ │ │ ├── model_runner.py │ │ │ │ │ │ ├── radix_cache.py │ │ │ │ │ │ └── scheduler.py │ │ │ │ │ └── tokenizer_manager.py │ │ │ │ ├── memory_pool.py │ │ │ │ ├── mm_utils.py │ │ │ │ ├── model_config.py │ │ │ │ ├── models │ │ │ │ │ ├── gemma.py │ │ │ │ │ ├── llama2.py │ │ │ │ │ ├── llava.py │ │ │ │ │ ├── mistral.py │ │ │ │ │ ├── mixtral.py │ │ │ │ │ ├── qwen.py │ │ │ │ │ ├── qwen2.py │ │ │ │ │ ├── stablelm.py │ │ │ │ │ └── yivl.py │ │ │ │ ├── sampling_params.py │ │ │ │ ├── server.py │ │ │ │ ├── server_args.py │ │ │ │ └── utils.py │ │ │ ├── test │ │ │ │ ├── test_conversation.py │ │ │ │ ├── test_openai_protocol.py │ │ │ │ ├── test_programs.py │ │ │ │ └── test_utils.py │ │ │ └── utils.py │ │ └── upload_pypi.sh │ ├── scripts │ │ ├── convert_yi_vl.py │ │ ├── convert_yi_vl.sh │ │ ├── format.sh │ │ └── launch_tgi.sh │ └── test │ │ ├── killall_python.sh │ │ ├── lang │ │ ├── run_all.py │ │ ├── test_anthropic_backend.py │ │ ├── test_bind_pin.py │ │ ├── test_openai_backend.py │ │ ├── test_openai_spec.py │ │ ├── test_srt_backend.py │ │ ├── test_tracing.py │ │ └── test_vertexai_backend.py │ │ └── srt │ │ ├── model │ │ ├── bench_llama_low_api.py │ │ ├── reference_hf.py │ │ ├── test_llama_extend.py │ │ ├── test_llama_low_api.py │ │ └── test_llava_low_api.py │ │ ├── test_curl.sh │ │ ├── test_flashinfer.py │ │ ├── test_httpserver_concurrent.py │ │ ├── test_httpserver_decode.py │ │ ├── test_httpserver_decode_stream.py │ │ ├── test_httpserver_llava.py │ │ ├── test_httpserver_reuse.py │ │ ├── test_jump_forward.py │ │ ├── test_openai_server.py │ │ └── test_robust.py │ └── tot │ ├── metric_utils.py │ ├── o1_rebase.py │ ├── o1_rebase_text.py │ └── rebase_utils.py ├── huggingface_token.txt ├── poison └── evaluation │ ├── __init__.py │ ├── constants.py │ ├── eval_sentiment.py │ ├── moderation.py │ ├── pred.py │ └── utils.py ├── requirements.txt ├── script └── safety_alignment │ ├── original.sh │ ├── sft.sh │ ├── sft_cot.sh │ ├── sft_cot_sys.sh │ └── sft_sys.sh ├── thumb.png ├── train ├── fsdp_config_qwen.json ├── fsdp_config_qwen_cpu.json ├── launch.sh ├── sft.py ├── sft_multinode.sh ├── sft_slurm.sh └── trainer.py └── two_stage.png /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/README.md -------------------------------------------------------------------------------- /data/add_aime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/data/add_aime.py -------------------------------------------------------------------------------- /data/collect_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/data/collect_data.py -------------------------------------------------------------------------------- /data/construct_limo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/data/construct_limo.py -------------------------------------------------------------------------------- /data/construct_long_safety_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/data/construct_long_safety_dataset.py -------------------------------------------------------------------------------- /data/construct_s1k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/data/construct_s1k.py -------------------------------------------------------------------------------- /data/construct_safety_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/data/construct_safety_dataset.py -------------------------------------------------------------------------------- /data/decontaminate_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/data/decontaminate_util.py -------------------------------------------------------------------------------- /data/fix_gpqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/data/fix_gpqa.py -------------------------------------------------------------------------------- /deepspeed_zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/deepspeed_zero3.yaml -------------------------------------------------------------------------------- /eval/commands.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/commands.sh -------------------------------------------------------------------------------- /eval/compute_sample_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/compute_sample_stats.py -------------------------------------------------------------------------------- /eval/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/generate.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/.coveragerc -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/.flake8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/.flake8 -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/.github/workflows/new_tasks.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/.github/workflows/new_tasks.yml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/.github/workflows/publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/.github/workflows/publish.yml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/.github/workflows/unit_tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/.github/workflows/unit_tests.yml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/.gitignore -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/.pre-commit-config.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/CITATION.bib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/CITATION.bib -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @haileyschoelkopf @lintangsutawika @baberabb 2 | -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/LICENSE.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/docs/API_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/docs/API_guide.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/docs/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/docs/CONTRIBUTING.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/docs/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/docs/decontamination.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/docs/decontamination.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/docs/img/fewshot_example_gpt3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/docs/img/fewshot_example_gpt3.png -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/docs/interface.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/docs/interface.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/docs/model_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/docs/model_guide.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/docs/new_task_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/docs/new_task_guide.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/docs/task_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/docs/task_guide.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/examples/lm-eval-overview.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/examples/lm-eval-overview.ipynb -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/examples/visualize-wandb.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/examples/visualize-wandb.ipynb -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/examples/visualize-zeno.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/examples/visualize-zeno.ipynb -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/ignore.txt: -------------------------------------------------------------------------------- 1 | ROUGE 2 | rouge 3 | nin 4 | maka 5 | mor 6 | te 7 | ond 8 | extraversion 9 | -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/__init__.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/__main__.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/api/filter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/api/filter.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/api/group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/api/group.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/api/instance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/api/instance.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/api/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/api/metrics.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/api/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/api/model.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/api/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/api/registry.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/api/samplers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/api/samplers.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/api/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/api/task.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/caching/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/caching/cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/caching/cache.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/decontamination/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/decontamination/archiver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/decontamination/archiver.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/decontamination/janitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/decontamination/janitor.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/evaluator.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/evaluator_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/evaluator_utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/filters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/filters/__init__.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/filters/decontamination.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/filters/decontamination.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/filters/extraction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/filters/extraction.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/filters/selection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/filters/selection.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/filters/transformation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/filters/transformation.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/loggers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/loggers/__init__.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/loggers/evaluation_tracker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/loggers/evaluation_tracker.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/loggers/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/loggers/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/loggers/wandb_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/loggers/wandb_logger.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/__init__.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/anthropic_llms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/anthropic_llms.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/api_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/api_models.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/dummy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/dummy.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/gemini.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/gemini.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/gguf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/gguf.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/hf_vlms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/hf_vlms.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/huggingface.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/mamba_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/mamba_lm.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/nemo_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/nemo_lm.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/neuralmagic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/neuralmagic.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/neuron_optimum.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/neuron_optimum.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/openai_completions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/openai_completions.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/optimum_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/optimum_lm.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/sglang.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/textsynth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/textsynth.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/vllm_causallms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/vllm_causallms.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/models/vllm_vlms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/models/vllm_vlms.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/prompts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/prompts/__init__.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/__init__.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/aclue/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/aclue/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/aclue/_aclue.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/aclue/_aclue.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/aexams/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/aexams/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/aexams/_aexams.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/aexams/_aexams.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/afrimgsm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/afrimgsm/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/afrimgsm/gen_yaml.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/afrimgsm/gen_yaml.sh -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/afrimgsm/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/afrimgsm/run.sh -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/afrimgsm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/afrimgsm/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/afrimmlu/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/afrimmlu/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/afrimmlu/fewshot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/afrimmlu/fewshot.sh -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/afrimmlu/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/afrimmlu/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/afrixnli/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/afrixnli/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/utils.py: -------------------------------------------------------------------------------- 1 | from lm_eval.utils import weighted_f1_score 2 | -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/afrixnli/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/afrixnli/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/agieval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/agieval/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/agieval/agieval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/agieval/agieval.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/agieval/agieval_cn.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/agieval/agieval_cn.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/agieval/agieval_en.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/agieval/agieval_en.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/agieval/aqua-rat.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/agieval/aqua-rat.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/agieval/jec-qa-ca.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/agieval/jec-qa-ca.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/agieval/jec-qa-kd.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/agieval/jec-qa-kd.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-en.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-en.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-zh.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/agieval/logiqa-zh.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/agieval/lsat-ar.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/agieval/lsat-ar.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/agieval/lsat-lr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/agieval/lsat-lr.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/agieval/lsat-rc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/agieval/lsat-rc.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/agieval/math.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/agieval/math.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/agieval/sat-en.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/agieval/sat-en.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/agieval/sat-math.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/agieval/sat-math.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/agieval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/agieval/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/aime/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/aime/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/aime/aime_figures.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/aime/aime_figures.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/aime/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/aime/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/anli/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/anli/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/anli/anli_r1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/anli/anli_r1.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/anli/anli_r2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/anli/anli_r2.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/anli/anli_r3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/anli/anli_r3.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/arabicmmlu/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/arabicmmlu/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/arabicmmlu/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/arabicmmlu/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/arc/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/arc/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/arc/arc_challenge.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/arc/arc_challenge.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/arc/arc_easy.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/arc/arc_easy.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/arc_mt/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/arc_mt/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/arithmetic/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/arithmetic/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/asdiv/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/asdiv/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/asdiv/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/asdiv/default.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/babi/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/babi/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/babi/babi.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/babi/babi.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/basque_bench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/basque_bench/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/basque_bench/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/basque_bench/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/basqueglue/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/basqueglue/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/basqueglue/bec.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/basqueglue/bec.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/basqueglue/bhtc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/basqueglue/bhtc.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/basqueglue/coref.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/basqueglue/coref.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/basqueglue/qnli.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/basqueglue/qnli.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/basqueglue/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/basqueglue/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/basqueglue/vaxx.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/basqueglue/vaxx.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/basqueglue/wic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/basqueglue/wic.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/bbh/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/bbh/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/bbh/fewshot/snarks.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/bbh/fewshot/snarks.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/bbh/zeroshot/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/bbh/zeroshot/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/belebele/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/belebele/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/belebele/_belebele.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/belebele/_belebele.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/benchmarks/openllm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/benchmarks/openllm.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/benchmarks/pythia.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/benchmarks/pythia.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/benchmarks/t0_eval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/benchmarks/t0_eval.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/bertaqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/bertaqa/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/bertaqa/bertaqa_en.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/bertaqa/bertaqa_en.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/bertaqa/bertaqa_eu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/bertaqa/bertaqa_eu.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/bigbench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/bigbench/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/blimp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/blimp/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/blimp/_blimp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/blimp/_blimp.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/blimp/_template_yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/blimp/_template_yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/blimp/causative.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/blimp/causative.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/blimp/inchoative.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/blimp/inchoative.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/blimp/intransitive.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/blimp/intransitive.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/blimp/passive_1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/blimp/passive_1.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/blimp/passive_2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/blimp/passive_2.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/blimp/transitive.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/blimp/transitive.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/blimp/wh_island.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/blimp/wh_island.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/catalan_bench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/catalan_bench/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/catalan_bench/teca.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/catalan_bench/teca.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/catalan_bench/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/catalan_bench/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/ceval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/ceval/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/ceval/_ceval-valid.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/ceval/_ceval-valid.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/cmmlu/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/cmmlu/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/cmmlu/_cmmlu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/cmmlu/_cmmlu.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_arts.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/cmmlu/cmmlu_arts.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/copal_id/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/copal_id/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/copal_id/standard.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/copal_id/standard.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/copal_id/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/copal_id/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/coqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/coqa/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/coqa/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/coqa/default.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/coqa/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/coqa/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/crows_pairs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/crows_pairs/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/crows_pairs/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/crows_pairs/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/csatqa/_csatqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/csatqa/_csatqa.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_gr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_gr.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_li.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_li.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rch.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rch.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rcs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rcs.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rcss.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_rcss.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_wr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/csatqa/csatqa_wr.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/csatqa/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/csatqa/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/drop/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/drop/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/drop/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/drop/default.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/drop/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/drop/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/eq_bench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/eq_bench/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/eq_bench/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/eq_bench/default.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/eq_bench/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/eq_bench/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/eus_exams/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/eus_exams/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/eus_exams/configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/eus_exams/configs.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/eus_exams/eus_exams: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/eus_exams/eus_exams -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/eus_exams/eus_exams_es: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/eus_exams/eus_exams_es -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/eus_exams/eus_exams_eu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/eus_exams/eus_exams_eu -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/eus_exams/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/eus_exams/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/eus_reading/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/eus_reading/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/eus_reading/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/eus_reading/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/eus_trivia/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/eus_trivia/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/eus_trivia/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/eus_trivia/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/fda/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/fda/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/fda/fda.yaml: -------------------------------------------------------------------------------- 1 | task: fda 2 | class: !function task.FDA 3 | -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/fda/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/fda/task.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/fld/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/fld/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/fld/fld_default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/fld/fld_default.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/fld/fld_star.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/fld/fld_star.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/french_bench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/french_bench/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/french_bench/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/french_bench/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/galician_bench/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/galician_bench/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/glianorex/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/glianorex/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/glue/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/glue/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/glue/cola/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/glue/cola/default.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/glue/mnli/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/glue/mnli/default.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/glue/mnli/mismatch.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/glue/mnli/mismatch.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/glue/mnli/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/glue/mnli/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/glue/mrpc/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/glue/mrpc/default.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/glue/qnli/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/glue/qnli/default.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/glue/qqp/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/glue/qqp/default.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/glue/rte/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/glue/rte/default.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/glue/sst2/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/glue/sst2/default.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/glue/wnli/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/glue/wnli/default.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/gpqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/gpqa/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/gpqa/n_shot/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/gpqa/n_shot/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/gpqa/openai/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/gpqa/openai/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/gpqa/zeroshot/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/gpqa/zeroshot/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/gsm8k/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/gsm8k/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/gsm8k/gsm8k-cot.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/gsm8k/gsm8k-cot.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/gsm8k/gsm8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/gsm8k/gsm8k.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/gsm_plus/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/gsm_plus/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/gsm_plus/gsm_plus.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/gsm_plus/gsm_plus.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/haerae/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/haerae/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/haerae/_haerae.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/haerae/_haerae.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/haerae/haerae_gk.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/haerae/haerae_gk.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/haerae/haerae_hi.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/haerae/haerae_hi.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/haerae/haerae_lw.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/haerae/haerae_lw.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/haerae/haerae_rw.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/haerae/haerae_rw.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/haerae/haerae_sn.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/haerae/haerae_sn.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/headqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/headqa/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/headqa/headqa_en.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/headqa/headqa_en.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/headqa/headqa_es.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/headqa/headqa_es.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/hellaswag/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/hellaswag/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/hellaswag/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/hellaswag/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/hendrycks_math/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/hendrycks_math/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/ifeval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/ifeval/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/ifeval/ifeval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/ifeval/ifeval.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/ifeval/instructions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/ifeval/instructions.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/ifeval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/ifeval/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/kmmlu/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/kmmlu/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/kobest/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/kobest/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/kobest/kobest_copa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/kobest/kobest_copa.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/kobest/kobest_wic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/kobest/kobest_wic.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/kobest/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/kobest/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/kormedmcqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/kormedmcqa/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/lambada/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/lambada/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/lambada_cloze/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/lambada_cloze/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/leaderboard/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/leaderboard/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/lingoly/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/lingoly/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/lingoly/script.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/lingoly/script.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/lingoly/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/lingoly/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/logiqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/logiqa/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/logiqa/logiqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/logiqa/logiqa.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/logiqa/utils_logiqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/logiqa/utils_logiqa.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/logiqa2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/logiqa2/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/logiqa2/logieval.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/logiqa2/logieval.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/logiqa2/logiqa2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/logiqa2/logiqa2.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mathqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mathqa/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mathqa/mathqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mathqa/mathqa.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mathqa/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mathqa/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mc_taco/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mc_taco/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mc_taco/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mc_taco/default.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/medmcqa/medmcqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/medmcqa/medmcqa.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/medqa/medqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/medqa/medqa.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mela/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mela/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mela/_mela.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mela/_mela.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_ar.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_ar.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_de.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_de.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_en.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_en.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_es.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_es.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_fr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_fr.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_is.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_is.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_it.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_it.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_ja.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_ja.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_ru.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_ru.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_zh.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mela/mela_zh.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/metamathqa/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/metamathqa/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mgsm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mgsm/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mgsm/direct/direct_yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mgsm/direct/direct_yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mgsm/en_cot/cot_yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mgsm/en_cot/cot_yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mgsm/gen_yaml.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mgsm/gen_yaml.sh -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mgsm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mgsm/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/minerva_math/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/minerva_math/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/minerva_math/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/minerva_math/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmlu/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmlu/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmlu/default/_mmlu.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmlu_pro/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmlu_pro/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmlu_pro/_mmlu_pro.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmlu_pro/_mmlu_pro.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmlu_pro/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmlu_pro/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmlusr/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmlusr/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmlusr/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmlusr/config.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmmu/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmmu/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmmu/_business.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmmu/_business.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmmu/_mmmu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmmu/_mmmu.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmmu/_science.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmmu/_science.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmmu/_template_yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmmu/_template_yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_art.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_art.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_biology.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_biology.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_design.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_design.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_finance.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_finance.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_history.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_history.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_manage.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_manage.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_math.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_math.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_music.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_music.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_pharmacy.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_pharmacy.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_physics.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmmu/mmmu_physics.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mmmu/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mmmu/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mutual/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mutual/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mutual/mutual.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mutual/mutual.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/mutual/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/mutual/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/noticia/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/noticia/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/noticia/noticia.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/noticia/noticia.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/noticia/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/noticia/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/nq_open/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/nq_open/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/nq_open/nq_open.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/nq_open/nq_open.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/openai_math/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/openai_math/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/openbookqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/openbookqa/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/paloma/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/paloma/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/paloma/_paloma_template: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/paloma/_paloma_template -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/paloma/paloma_gab.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/paloma/paloma_gab.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/paloma/paloma_mc4.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/paloma/paloma_mc4.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/paloma/paloma_ptb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/paloma/paloma_ptb.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/paloma/paloma_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/paloma/paloma_utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/paws-x/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/paws-x/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/paws-x/_pawsx.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/paws-x/_pawsx.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_de.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_de.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_en.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_en.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_es.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_es.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_fr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_fr.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_ja.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_ja.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_ko.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_ko.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_zh.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/paws-x/paws_zh.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/pile/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/pile/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/pile/pile_arxiv.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/pile/pile_arxiv.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/pile/pile_books3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/pile/pile_books3.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/pile/pile_enron.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/pile/pile_enron.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/pile/pile_europarl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/pile/pile_europarl.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/pile/pile_freelaw.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/pile/pile_freelaw.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/pile/pile_github.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/pile/pile_github.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/pile/pile_pile-cc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/pile/pile_pile-cc.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/pile/pile_uspto.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/pile/pile_uspto.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/pile_10k/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/pile_10k/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/pile_10k/pile_10k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/pile_10k/pile_10k.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/piqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/piqa/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/piqa/piqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/piqa/piqa.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/polemo2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/polemo2/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/polemo2/polemo2_in.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/polemo2/polemo2_in.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/prost/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/prost/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/pubmedqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/pubmedqa/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/pubmedqa/pubmedqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/pubmedqa/pubmedqa.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/qa4mre/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/qa4mre/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2011.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2011.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2012.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2012.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2013.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/qa4mre/qa4mre_2013.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/qasper/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/qasper/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/qasper/bool.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/qasper/bool.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/qasper/freeform.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/qasper/freeform.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/qasper/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/qasper/metrics.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/qasper/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/qasper/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/race/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/race/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/race/preprocess_race.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/race/preprocess_race.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/race/race.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/race/race.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/sciq/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/sciq/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/sciq/sciq.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/sciq/sciq.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/scrolls/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/scrolls/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/scrolls/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/scrolls/task.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/siqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/siqa/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/siqa/siqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/siqa/siqa.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/spanish_bench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/spanish_bench/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/spanish_bench/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/spanish_bench/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/squadv2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/squadv2/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/squadv2/squadv2.yaml: -------------------------------------------------------------------------------- 1 | task: squadv2 2 | class: !function task.SQuAD2 3 | -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/squadv2/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/squadv2/task.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/storycloze/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/storycloze/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/super_glue/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/super_glue/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/swag/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/swag/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/swag/swag.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/swag/swag.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/swde/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/swde/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/swde/swde.yaml: -------------------------------------------------------------------------------- 1 | task: swde 2 | class: !function task.SWDE 3 | -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/swde/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/swde/task.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/tmlu/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/tmlu/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/tmlu/default/_tmlu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/tmlu/default/_tmlu.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/tmlu/default/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/tmlu/default/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/tmlu/subject.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/tmlu/subject.tsv -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/tmmluplus/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/tmmluplus/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/tmmluplus/subject.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/tmmluplus/subject.tsv -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/toxigen/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/toxigen/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/toxigen/toxigen.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/toxigen/toxigen.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/toxigen/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/toxigen/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/translation/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/translation/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/translation/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/translation/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/triviaqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/triviaqa/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/triviaqa/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/triviaqa/default.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/truthfulqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/truthfulqa/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/truthfulqa/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/truthfulqa/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/turkishmmlu/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/turkishmmlu/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/unitxt/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/unitxt/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/unitxt/ag_news.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/unitxt/ag_news.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/unitxt/atis.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/unitxt/atis.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/unitxt/banking77.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/unitxt/banking77.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/unitxt/coedit_gec.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/unitxt/coedit_gec.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/unitxt/dbpedia_14.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/unitxt/dbpedia_14.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/unitxt/ledgar.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/unitxt/ledgar.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/unitxt/stsb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/unitxt/stsb.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/unitxt/task.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/unitxt/task.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/unitxt/unfair_tos.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/unitxt/unfair_tos.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/unitxt/unitxt: -------------------------------------------------------------------------------- 1 | class: !function task.Unitxt 2 | -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/unitxt/xsum.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/unitxt/xsum.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/unscramble/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/unscramble/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/webqs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/webqs/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/webqs/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/webqs/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/webqs/webqs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/webqs/webqs.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/wikitext/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/wikitext/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/wikitext/wikitext.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/wikitext/wikitext.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/winogrande/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/winogrande/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/winogrande/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/winogrande/default.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/wmdp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/wmdp/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/wmdp/_wmdp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/wmdp/_wmdp.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/wmdp/wmdp_bio.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/wmdp/wmdp_bio.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/wmdp/wmdp_chem.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/wmdp/wmdp_chem.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/wmdp/wmdp_cyber.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/wmdp/wmdp_cyber.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/wmt2016/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/wmt2016/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/wmt2016/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/wmt2016/metrics.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/wsc273/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/wsc273/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/wsc273/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/wsc273/default.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/wsc273/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/wsc273/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xcopa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xcopa/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xcopa/_xcopa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xcopa/_xcopa.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_et.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_et.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_ht.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_ht.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_id.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_id.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_it.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_it.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_qu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_qu.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_sw.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_sw.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_ta.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_ta.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_th.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_th.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_tr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_tr.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_vi.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_vi.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_zh.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xcopa/default_zh.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xcopa/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xcopa/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/_xnli.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/_xnli.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ar.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ar.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_bg.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_bg.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_common_yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_common_yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_de.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_de.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_el.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_el.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_en.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_en.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_es.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_es.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_fr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_fr.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_hi.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_hi.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ru.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ru.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_sw.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_sw.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_th.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_th.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_tr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_tr.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ur.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_ur.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_vi.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_vi.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_zh.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli/xnli_zh.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli_eu/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli_eu/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli_eu/xnli_eu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli_eu/xnli_eu.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xnli_eu/xnli_eu_mt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xnli_eu/xnli_eu_mt.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xstorycloze/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xstorycloze/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xwinograd/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xwinograd/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/tasks/xwinograd/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/tasks/xwinograd/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/lm_eval/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/lm_eval/utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/mypy.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/mypy.ini -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/pyproject.toml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/requirements.txt: -------------------------------------------------------------------------------- 1 | -e . 2 | -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/scripts/build_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/scripts/build_benchmark.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/scripts/clean_training_data/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/scripts/clean_training_data/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/scripts/clean_training_data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/scripts/cost_estimate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/scripts/cost_estimate.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/scripts/get_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/scripts/get_prompts.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/scripts/make_gpt2_test_cases.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/scripts/make_gpt2_test_cases.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/scripts/make_table_results.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/scripts/make_table_results.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/scripts/make_table_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/scripts/make_table_tasks.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/scripts/model_comparator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/scripts/model_comparator.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/scripts/regression.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/scripts/regression.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/scripts/requests_caching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/scripts/requests_caching.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/scripts/write_out.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/scripts/write_out.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/scripts/zeno_visualize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/scripts/zeno_visualize.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/setup.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/templates/new_yaml_task/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/templates/new_yaml_task/README.md -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/templates/new_yaml_task/blank_yaml.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/models/test_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/models/test_api.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/models/test_gguf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/models/test_gguf.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/models/test_huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/models/test_huggingface.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/models/test_neuralmagic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/models/test_neuralmagic.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/models/test_openvino.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/models/test_openvino.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/models/test_vllm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/models/test_vllm.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/test_cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/test_cli.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/test_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/test_evaluator.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/test_include_path.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/test_include_path.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/test_janitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/test_janitor.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/test_misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/test_misc.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/test_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/test_prompt.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/test_requests_caching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/test_requests_caching.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/test_task_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/test_task_manager.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/test_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/test_tasks.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/test_utils.py -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testconfigs/arc_test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testconfigs/arc_test.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testdata/boolq-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testdata/boolq-v0-loglikelihood -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testdata/boolq-v1-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testdata/boolq-v1-loglikelihood -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testdata/cb-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testdata/cb-v0-loglikelihood -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testdata/cb-v1-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testdata/cb-v1-loglikelihood -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testdata/cola-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testdata/cola-v0-loglikelihood -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testdata/copa-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testdata/copa-v0-loglikelihood -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testdata/coqa-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testdata/coqa-v0-greedy_until -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testdata/coqa-v1-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testdata/coqa-v1-greedy_until -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testdata/drop-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testdata/drop-v0-greedy_until -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testdata/drop-v1-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testdata/drop-v1-greedy_until -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testdata/gsm8k-v0-greedy_until: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testdata/gsm8k-v0-greedy_until -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testdata/mnli-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testdata/mnli-v0-loglikelihood -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testdata/mrpc-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testdata/mrpc-v0-loglikelihood -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testdata/piqa-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testdata/piqa-v0-loglikelihood -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testdata/prost-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testdata/prost-v0-loglikelihood -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testdata/qnli-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testdata/qnli-v0-loglikelihood -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testdata/qqp-v0-loglikelihood: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testdata/qqp-v0-loglikelihood -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/testyamls/test-01.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/testyamls/test-01.yaml -------------------------------------------------------------------------------- /eval/lm-evaluation-harness/tests/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/lm-evaluation-harness/tests/utils.py -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/.gitmodules -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/README.md -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/evaluate/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/evaluate/evaluate_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/exp_results/.gitignore: -------------------------------------------------------------------------------- 1 | **/*.json 2 | -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/exp_results/combine_answers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/exp_results/combine_answers.py -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/finetune/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/finetune/README.md -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/finetune/gpt-accelera/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/finetune/gpt-accelera/LICENSE -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/finetune/gpt-accelera/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/finetune/gpt-accelera/README.md -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/finetune/gpt-accelera/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | sentencepiece 3 | -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/finetune/gpt-accelera/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/finetune/gpt-accelera/setup.py -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/finetune/gpt-accelera/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/finetune/gpt-accelera/util.py -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/hf_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/hf_score.py -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/hype-parameters/rebase.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/hype-parameters/rebase.yaml -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/hype-parameters/rebase16.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/hype-parameters/rebase16.yaml -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/hype-parameters/rebase8.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/hype-parameters/rebase8.yaml -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/math_evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/math_evaluate.py -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/rebase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/rebase.py -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/rebase_ori.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/rebase_ori.py -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/scripts/run_policy_our.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/scripts/run_policy_our.sh -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/scripts/run_reward.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/scripts/run_reward.sh -------------------------------------------------------------------------------- /eval/rebase/inference_scaling/sgl_baseline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/inference_scaling/sgl_baseline.py -------------------------------------------------------------------------------- /eval/rebase/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/run.sh -------------------------------------------------------------------------------- /eval/rebase/sglang/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/LICENSE -------------------------------------------------------------------------------- /eval/rebase/sglang/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/assets/llama_7b.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/assets/llama_7b.jpg -------------------------------------------------------------------------------- /eval/rebase/sglang/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/assets/logo.png -------------------------------------------------------------------------------- /eval/rebase/sglang/assets/logo_square.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/assets/logo_square.png -------------------------------------------------------------------------------- /eval/rebase/sglang/assets/mixtral_8x7b.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/assets/mixtral_8x7b.jpg -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/dspy/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/dspy/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/dspy/bench_dspy_intro.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/dspy/bench_dspy_intro.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/generative_agents/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/generative_agents/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/generative_agents/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/generative_agents/bench_other.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/gsm8k/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/gsm8k/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/gsm8k/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/gsm8k/bench_other.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/gsm8k/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/gsm8k/bench_sglang.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/hellaswag/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/hellaswag/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/hellaswag/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/hellaswag/bench_other.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/hellaswag/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/hellaswag/bench_sglang.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/json_decode_regex/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/json_decode_regex/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/json_decode_regex/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/json_decode_regex/bench_other.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/json_jump_forward/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/json_jump_forward/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/json_jump_forward/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/json_jump_forward/bench_other.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/json_jump_forward/dataset.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/json_jump_forward/dataset.txt -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/latency_throughput/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/latency_throughput/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/line_retrieval/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/line_retrieval/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/line_retrieval/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/line_retrieval/bench_sglang.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/line_retrieval/gen_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/line_retrieval/gen_data.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/llava_bench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/llava_bench/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/llava_bench/bench_hf_mme.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/llava_bench/bench_hf_mme.sh -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/llava_bench/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/llava_bench/bench_sglang.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/llava_bench/bench_sglang_mme.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/llava_bench/bench_sglang_mme.sh -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/llava_bench/download_images.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/llava_bench/download_images.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/llava_bench/questions.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/llava_bench/questions.jsonl -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/llm_judge/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/llm_judge/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/llm_judge/articles.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/llm_judge/articles.jsonl -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/llm_judge/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/llm_judge/bench_other.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/llm_judge/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/llm_judge/bench_sglang.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/long_json_decode/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/long_json_decode/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/long_json_decode/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/long_json_decode/bench_other.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/long_json_decode/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/long_json_decode/bench_sglang.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/mmlu/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/mmlu/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/mmlu/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/mmlu/bench_other.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/mmlu/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/mmlu/bench_sglang.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/mtbench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/mtbench/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/mtbench/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/mtbench/bench_other.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/mtbench/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/mtbench/bench_sglang.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/multi_chain_reasoning/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/multi_chain_reasoning/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/multi_document_qa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/multi_document_qa/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/multi_document_qa/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/multi_document_qa/bench_other.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/multi_turn_chat/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/multi_turn_chat/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/multi_turn_chat/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/multi_turn_chat/bench_other.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/multi_turn_chat/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/multi_turn_chat/bench_sglang.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/multi_turn_chat/data_gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/multi_turn_chat/data_gen.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/react/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/react/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/react/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/react/bench_other.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/react/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/react/bench_sglang.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/react/hotpotqa_100.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/react/hotpotqa_100.jsonl -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/tip_suggestion/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/tip_suggestion/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/tip_suggestion/bench_other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/tip_suggestion/bench_other.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/tip_suggestion/bench_sglang.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/tip_suggestion/bench_sglang.py -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/tree_of_thought_deep/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/tree_of_thought_deep/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/benchmark/tree_of_thought_v0/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/benchmark/tree_of_thought_v0/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/docs/benchmark_results.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/docs/benchmark_results.md -------------------------------------------------------------------------------- /eval/rebase/sglang/docs/flashinfer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/docs/flashinfer.md -------------------------------------------------------------------------------- /eval/rebase/sglang/docs/model_support.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/docs/model_support.md -------------------------------------------------------------------------------- /eval/rebase/sglang/docs/sampling_params.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/docs/sampling_params.md -------------------------------------------------------------------------------- /eval/rebase/sglang/docs/test_process.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/docs/test_process.md -------------------------------------------------------------------------------- /eval/rebase/sglang/examples/quick_start/images/cat.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/examples/quick_start/images/cat.jpeg -------------------------------------------------------------------------------- /eval/rebase/sglang/examples/quick_start/images/dog.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/examples/quick_start/images/dog.jpeg -------------------------------------------------------------------------------- /eval/rebase/sglang/examples/quick_start/srt_example_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/examples/quick_start/srt_example_chat.py -------------------------------------------------------------------------------- /eval/rebase/sglang/examples/quick_start/srt_example_llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/examples/quick_start/srt_example_llava.py -------------------------------------------------------------------------------- /eval/rebase/sglang/examples/quick_start/srt_example_yi_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/examples/quick_start/srt_example_yi_vl.py -------------------------------------------------------------------------------- /eval/rebase/sglang/examples/usage/async_io.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/examples/usage/async_io.py -------------------------------------------------------------------------------- /eval/rebase/sglang/examples/usage/choices_logprob.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/examples/usage/choices_logprob.py -------------------------------------------------------------------------------- /eval/rebase/sglang/examples/usage/json_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/examples/usage/json_decode.py -------------------------------------------------------------------------------- /eval/rebase/sglang/examples/usage/openai_speculative.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/examples/usage/openai_speculative.py -------------------------------------------------------------------------------- /eval/rebase/sglang/examples/usage/parallel_sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/examples/usage/parallel_sample.py -------------------------------------------------------------------------------- /eval/rebase/sglang/examples/usage/readme_examples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/examples/usage/readme_examples.py -------------------------------------------------------------------------------- /eval/rebase/sglang/examples/usage/streaming.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/examples/usage/streaming.py -------------------------------------------------------------------------------- /eval/rebase/sglang/examples/usage/triton/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/examples/usage/triton/Dockerfile -------------------------------------------------------------------------------- /eval/rebase/sglang/examples/usage/triton/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/examples/usage/triton/README.md -------------------------------------------------------------------------------- /eval/rebase/sglang/playground/launch_tgi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/playground/launch_tgi.sh -------------------------------------------------------------------------------- /eval/rebase/sglang/playground/load_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/playground/load_tokenizer.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/pyproject.toml -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang.egg-info/PKG-INFO: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang.egg-info/PKG-INFO -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang.egg-info/SOURCES.txt -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang.egg-info/requires.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang.egg-info/requires.txt -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | sglang 2 | -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/__init__.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/api.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/backend/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/backend/anthropic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/backend/anthropic.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/backend/base_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/backend/base_backend.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/backend/openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/backend/openai.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/backend/runtime_endpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/backend/runtime_endpoint.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/backend/vertexai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/backend/vertexai.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/global_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/global_config.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/lang/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/lang/chat_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/lang/chat_template.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/lang/compiler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/lang/compiler.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/lang/interpreter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/lang/interpreter.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/lang/ir.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/lang/ir.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/lang/tracer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/lang/tracer.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/launch_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/launch_server.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/backend_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/backend_config.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/constrained/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/constrained/__init__.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/constrained/fsm_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/constrained/fsm_cache.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/conversation.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/hf_transformers_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/hf_transformers_utils.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/managers/io_struct.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/managers/io_struct.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/memory_pool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/memory_pool.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/mm_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/mm_utils.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/model_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/model_config.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/models/gemma.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/models/gemma.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/models/llama2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/models/llama2.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/models/llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/models/llava.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/models/mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/models/mistral.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/models/mixtral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/models/mixtral.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/models/qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/models/qwen.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/models/qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/models/qwen2.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/models/stablelm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/models/stablelm.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/models/yivl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/models/yivl.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/sampling_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/sampling_params.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/server.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/server_args.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/server_args.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/srt/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/srt/utils.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/test/test_conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/test/test_conversation.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/test/test_openai_protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/test/test_openai_protocol.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/test/test_programs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/test/test_programs.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/test/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/test/test_utils.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/sglang/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/sglang/utils.py -------------------------------------------------------------------------------- /eval/rebase/sglang/python/upload_pypi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/python/upload_pypi.sh -------------------------------------------------------------------------------- /eval/rebase/sglang/scripts/convert_yi_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/scripts/convert_yi_vl.py -------------------------------------------------------------------------------- /eval/rebase/sglang/scripts/convert_yi_vl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/scripts/convert_yi_vl.sh -------------------------------------------------------------------------------- /eval/rebase/sglang/scripts/format.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/scripts/format.sh -------------------------------------------------------------------------------- /eval/rebase/sglang/scripts/launch_tgi.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/scripts/launch_tgi.sh -------------------------------------------------------------------------------- /eval/rebase/sglang/test/killall_python.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/killall_python.sh -------------------------------------------------------------------------------- /eval/rebase/sglang/test/lang/run_all.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/lang/run_all.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/lang/test_anthropic_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/lang/test_anthropic_backend.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/lang/test_bind_pin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/lang/test_bind_pin.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/lang/test_openai_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/lang/test_openai_backend.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/lang/test_openai_spec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/lang/test_openai_spec.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/lang/test_srt_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/lang/test_srt_backend.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/lang/test_tracing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/lang/test_tracing.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/lang/test_vertexai_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/lang/test_vertexai_backend.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/srt/model/bench_llama_low_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/srt/model/bench_llama_low_api.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/srt/model/reference_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/srt/model/reference_hf.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/srt/model/test_llama_extend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/srt/model/test_llama_extend.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/srt/model/test_llama_low_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/srt/model/test_llama_low_api.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/srt/model/test_llava_low_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/srt/model/test_llava_low_api.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/srt/test_curl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/srt/test_curl.sh -------------------------------------------------------------------------------- /eval/rebase/sglang/test/srt/test_flashinfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/srt/test_flashinfer.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/srt/test_httpserver_concurrent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/srt/test_httpserver_concurrent.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/srt/test_httpserver_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/srt/test_httpserver_decode.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/srt/test_httpserver_decode_stream.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/srt/test_httpserver_decode_stream.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/srt/test_httpserver_llava.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/srt/test_httpserver_llava.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/srt/test_httpserver_reuse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/srt/test_httpserver_reuse.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/srt/test_jump_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/srt/test_jump_forward.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/srt/test_openai_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/srt/test_openai_server.py -------------------------------------------------------------------------------- /eval/rebase/sglang/test/srt/test_robust.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/sglang/test/srt/test_robust.py -------------------------------------------------------------------------------- /eval/rebase/tot/metric_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/tot/metric_utils.py -------------------------------------------------------------------------------- /eval/rebase/tot/o1_rebase.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/tot/o1_rebase.py -------------------------------------------------------------------------------- /eval/rebase/tot/o1_rebase_text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/tot/o1_rebase_text.py -------------------------------------------------------------------------------- /eval/rebase/tot/rebase_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/eval/rebase/tot/rebase_utils.py -------------------------------------------------------------------------------- /huggingface_token.txt: -------------------------------------------------------------------------------- 1 | xx -------------------------------------------------------------------------------- /poison/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /poison/evaluation/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/poison/evaluation/constants.py -------------------------------------------------------------------------------- /poison/evaluation/eval_sentiment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/poison/evaluation/eval_sentiment.py -------------------------------------------------------------------------------- /poison/evaluation/moderation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/poison/evaluation/moderation.py -------------------------------------------------------------------------------- /poison/evaluation/pred.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/poison/evaluation/pred.py -------------------------------------------------------------------------------- /poison/evaluation/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/poison/evaluation/utils.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/requirements.txt -------------------------------------------------------------------------------- /script/safety_alignment/original.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/script/safety_alignment/original.sh -------------------------------------------------------------------------------- /script/safety_alignment/sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/script/safety_alignment/sft.sh -------------------------------------------------------------------------------- /script/safety_alignment/sft_cot.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/script/safety_alignment/sft_cot.sh -------------------------------------------------------------------------------- /script/safety_alignment/sft_cot_sys.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/script/safety_alignment/sft_cot_sys.sh -------------------------------------------------------------------------------- /script/safety_alignment/sft_sys.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/script/safety_alignment/sft_sys.sh -------------------------------------------------------------------------------- /thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/thumb.png -------------------------------------------------------------------------------- /train/fsdp_config_qwen.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/train/fsdp_config_qwen.json -------------------------------------------------------------------------------- /train/fsdp_config_qwen_cpu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/train/fsdp_config_qwen_cpu.json -------------------------------------------------------------------------------- /train/launch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/train/launch.sh -------------------------------------------------------------------------------- /train/sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/train/sft.py -------------------------------------------------------------------------------- /train/sft_multinode.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/train/sft_multinode.sh -------------------------------------------------------------------------------- /train/sft_slurm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/train/sft_slurm.sh -------------------------------------------------------------------------------- /train/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/train/trainer.py -------------------------------------------------------------------------------- /two_stage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/git-disl/Safety-Tax/HEAD/two_stage.png --------------------------------------------------------------------------------