├── decompose └── __init__.py ├── lm_eval ├── api │ └── __init__.py ├── tasks │ ├── nq_open │ │ └── README.md │ ├── squadv2 │ │ └── squadv2.yaml │ ├── fld │ │ └── fld_star.yaml │ ├── headqa │ │ └── headqa_es.yaml │ ├── pile │ │ ├── pile_enron.yaml │ │ ├── pile_uspto.yaml │ │ ├── pile_books3.yaml │ │ ├── pile_github.yaml │ │ ├── pile_europarl.yaml │ │ ├── pile_freelaw.yaml │ │ ├── pile_pile-cc.yaml │ │ ├── pile_gutenberg.yaml │ │ ├── pile_hackernews.yaml │ │ ├── pile_philpapers.yaml │ │ ├── pile_ubuntu-irc.yaml │ │ ├── pile_wikipedia.yaml │ │ ├── pile_bookcorpus2.yaml │ │ ├── pile_nih-exporter.yaml │ │ ├── pile_opensubtitles.yaml │ │ ├── pile_openwebtext2.yaml │ │ ├── pile_stackexchange.yaml │ │ ├── pile_dm-mathematics.yaml │ │ ├── pile_pubmed-central.yaml │ │ ├── pile_pubmed-abstracts.yaml │ │ └── pile_youtubesubtitles.yaml │ ├── mutual │ │ └── multual_plus.yaml │ ├── scrolls │ │ ├── scrolls_qasper.yaml │ │ ├── scrolls_qmsum.yaml │ │ ├── scrolls_quality.yaml │ │ ├── scrolls_govreport.yaml │ │ ├── scrolls_contractnli.yaml │ │ ├── scrolls_narrativeqa.yaml │ │ └── scrolls_summscreenfd.yaml │ ├── xstorycloze │ │ ├── default_en.yaml │ │ ├── default_es.yaml │ │ ├── default_eu.yaml │ │ ├── default_hi.yaml │ │ ├── default_id.yaml │ │ ├── default_my.yaml │ │ ├── default_ru.yaml │ │ ├── default_sw.yaml │ │ ├── default_te.yaml │ │ └── default_zh.yaml │ ├── arc │ │ └── arc_challenge.yaml │ ├── csatqa │ │ ├── csatqa_gr.yaml │ │ ├── csatqa_li.yaml │ │ ├── csatqa_wr.yaml │ │ ├── csatqa_rch.yaml │ │ ├── csatqa_rcs.yaml │ │ └── csatqa_rcss.yaml │ ├── kmmlu │ │ ├── hard │ │ │ ├── kmmlu_hard_law.yaml │ │ │ ├── kmmlu_hard_math.yaml │ │ │ ├── kmmlu_hard_biology.yaml │ │ │ ├── kmmlu_hard_ecology.yaml │ │ │ ├── kmmlu_hard_fashion.yaml │ │ │ ├── kmmlu_hard_health.yaml │ │ │ ├── kmmlu_hard_patent.yaml │ │ │ ├── kmmlu_hard_taxation.yaml │ │ │ ├── kmmlu_hard_accounting.yaml │ │ │ ├── kmmlu_hard_chemistry.yaml │ │ │ ├── kmmlu_hard_economics.yaml │ │ │ ├── kmmlu_hard_education.yaml │ │ │ ├── kmmlu_hard_geomatics.yaml │ │ │ ├── kmmlu_hard_management.yaml │ │ │ ├── kmmlu_hard_marketing.yaml │ │ │ ├── kmmlu_hard_psychology.yaml │ │ │ ├── kmmlu_hard_construction.yaml │ │ │ ├── kmmlu_hard_criminal_law.yaml │ │ │ ├── kmmlu_hard_real_estate.yaml │ │ │ ├── kmmlu_hard_public_safety.yaml │ │ │ ├── kmmlu_hard_food_processing.yaml │ │ │ ├── kmmlu_hard_korean_history.yaml │ │ │ ├── kmmlu_hard_social_welfare.yaml │ │ │ ├── kmmlu_hard_civil_engineering.yaml │ │ │ ├── kmmlu_hard_computer_science.yaml │ │ │ ├── kmmlu_hard_energy_management.yaml │ │ │ ├── kmmlu_hard_chemical_engineering.yaml │ │ │ ├── kmmlu_hard_industrial_engineer.yaml │ │ │ ├── kmmlu_hard_maritime_engineering.yaml │ │ │ ├── kmmlu_hard_agricultural_sciences.yaml │ │ │ ├── kmmlu_hard_electrical_engineering.yaml │ │ │ ├── kmmlu_hard_environmental_science.yaml │ │ │ ├── kmmlu_hard_information_technology.yaml │ │ │ ├── kmmlu_hard_materials_engineering.yaml │ │ │ ├── kmmlu_hard_mechanical_engineering.yaml │ │ │ ├── kmmlu_hard_nondestructive_testing.yaml │ │ │ ├── kmmlu_hard_electronics_engineering.yaml │ │ │ ├── kmmlu_hard_refrigerating_machinery.yaml │ │ │ ├── kmmlu_hard_gas_technology_and_engineering.yaml │ │ │ ├── kmmlu_hard_interior_architecture_and_design.yaml │ │ │ ├── kmmlu_hard_machine_design_and_manufacturing.yaml │ │ │ ├── kmmlu_hard_political_science_and_sociology.yaml │ │ │ └── kmmlu_hard_railway_and_automotive_engineering.yaml │ │ ├── direct │ │ │ ├── kmmlu_direct_law.yaml │ │ │ ├── kmmlu_direct_math.yaml │ │ │ ├── kmmlu_direct_health.yaml │ │ │ ├── kmmlu_direct_patent.yaml │ │ │ ├── kmmlu_direct_biology.yaml │ │ │ ├── kmmlu_direct_ecology.yaml │ │ │ ├── kmmlu_direct_fashion.yaml │ │ │ ├── kmmlu_direct_chemistry.yaml │ │ │ ├── kmmlu_direct_economics.yaml │ │ │ ├── kmmlu_direct_education.yaml │ │ │ ├── kmmlu_direct_geomatics.yaml │ │ │ ├── kmmlu_direct_marketing.yaml │ │ │ ├── kmmlu_direct_taxation.yaml │ │ │ ├── kmmlu_direct_accounting.yaml │ │ │ ├── kmmlu_direct_management.yaml │ │ │ ├── kmmlu_direct_psychology.yaml │ │ │ ├── kmmlu_direct_real_estate.yaml │ │ │ ├── kmmlu_direct_construction.yaml │ │ │ ├── kmmlu_direct_criminal_law.yaml │ │ │ ├── kmmlu_direct_korean_history.yaml │ │ │ ├── kmmlu_direct_public_safety.yaml │ │ │ ├── kmmlu_direct_social_welfare.yaml │ │ │ ├── kmmlu_direct_computer_science.yaml │ │ │ ├── kmmlu_direct_food_processing.yaml │ │ │ ├── kmmlu_direct_civil_engineering.yaml │ │ │ ├── kmmlu_direct_energy_management.yaml │ │ │ ├── kmmlu_direct_industrial_engineer.yaml │ │ │ ├── kmmlu_direct_agricultural_sciences.yaml │ │ │ ├── kmmlu_direct_chemical_engineering.yaml │ │ │ ├── kmmlu_direct_environmental_science.yaml │ │ │ ├── kmmlu_direct_maritime_engineering.yaml │ │ │ ├── kmmlu_direct_materials_engineering.yaml │ │ │ ├── kmmlu_direct_electrical_engineering.yaml │ │ │ ├── kmmlu_direct_information_technology.yaml │ │ │ ├── kmmlu_direct_mechanical_engineering.yaml │ │ │ ├── kmmlu_direct_nondestructive_testing.yaml │ │ │ ├── kmmlu_direct_electronics_engineering.yaml │ │ │ ├── kmmlu_direct_refrigerating_machinery.yaml │ │ │ ├── kmmlu_direct_gas_technology_and_engineering.yaml │ │ │ ├── kmmlu_direct_political_science_and_sociology.yaml │ │ │ ├── kmmlu_direct_interior_architecture_and_design.yaml │ │ │ └── kmmlu_direct_machine_design_and_manufacturing.yaml │ │ └── direct_hard │ │ │ ├── kmmlu_direct_hard_law.yaml │ │ │ ├── kmmlu_direct_hard_math.yaml │ │ │ ├── kmmlu_direct_hard_health.yaml │ │ │ ├── kmmlu_direct_hard_patent.yaml │ │ │ ├── kmmlu_direct_hard_biology.yaml │ │ │ ├── kmmlu_direct_hard_ecology.yaml │ │ │ ├── kmmlu_direct_hard_fashion.yaml │ │ │ ├── kmmlu_direct_hard_chemistry.yaml │ │ │ ├── kmmlu_direct_hard_economics.yaml │ │ │ ├── kmmlu_direct_hard_education.yaml │ │ │ ├── kmmlu_direct_hard_geomatics.yaml │ │ │ ├── kmmlu_direct_hard_marketing.yaml │ │ │ ├── kmmlu_direct_hard_taxation.yaml │ │ │ ├── kmmlu_direct_hard_accounting.yaml │ │ │ ├── kmmlu_direct_hard_management.yaml │ │ │ ├── kmmlu_direct_hard_psychology.yaml │ │ │ ├── kmmlu_direct_hard_real_estate.yaml │ │ │ ├── kmmlu_direct_hard_construction.yaml │ │ │ ├── kmmlu_direct_hard_criminal_law.yaml │ │ │ ├── kmmlu_direct_hard_korean_history.yaml │ │ │ ├── kmmlu_direct_hard_public_safety.yaml │ │ │ ├── kmmlu_direct_hard_social_welfare.yaml │ │ │ ├── kmmlu_direct_hard_computer_science.yaml │ │ │ ├── kmmlu_direct_hard_food_processing.yaml │ │ │ ├── kmmlu_direct_hard_civil_engineering.yaml │ │ │ ├── kmmlu_direct_hard_energy_management.yaml │ │ │ ├── kmmlu_direct_hard_industrial_engineer.yaml │ │ │ ├── kmmlu_direct_hard_agricultural_sciences.yaml │ │ │ ├── kmmlu_direct_hard_chemical_engineering.yaml │ │ │ ├── kmmlu_direct_hard_environmental_science.yaml │ │ │ ├── kmmlu_direct_hard_maritime_engineering.yaml │ │ │ ├── kmmlu_direct_hard_materials_engineering.yaml │ │ │ ├── kmmlu_direct_hard_electrical_engineering.yaml │ │ │ ├── kmmlu_direct_hard_information_technology.yaml │ │ │ ├── kmmlu_direct_hard_mechanical_engineering.yaml │ │ │ ├── kmmlu_direct_hard_nondestructive_testing.yaml │ │ │ ├── kmmlu_direct_hard_electronics_engineering.yaml │ │ │ └── kmmlu_direct_hard_refrigerating_machinery.yaml │ ├── glue │ │ └── mnli │ │ │ └── mismatch.yaml │ ├── haerae │ │ ├── haerae_hi.yaml │ │ ├── haerae_lw.yaml │ │ ├── haerae_rw.yaml │ │ ├── haerae_gk.yaml │ │ └── haerae_sn.yaml │ ├── french_bench │ │ └── _default_template_yaml │ ├── lambada_multilingual │ │ ├── lambada_mt_de.yaml │ │ ├── lambada_mt_es.yaml │ │ ├── lambada_mt_fr.yaml │ │ └── lambada_mt_it.yaml │ ├── xnli_eu │ │ ├── xnli_eu_mt.yaml │ │ └── xnli_eu_native.yaml │ ├── crows_pairs │ │ ├── crows_pairs_french.yaml │ │ ├── crows_pairs_english_age.yaml │ │ ├── crows_pairs_french_age.yaml │ │ ├── crows_pairs_english_autre.yaml │ │ ├── crows_pairs_french_autre.yaml │ │ ├── crows_pairs_french_gender.yaml │ │ ├── crows_pairs_english_gender.yaml │ │ ├── crows_pairs_english_religion.yaml │ │ ├── crows_pairs_french_religion.yaml │ │ ├── crows_pairs_english_disability.yaml │ │ ├── crows_pairs_english_race_color.yaml │ │ ├── crows_pairs_french_disability.yaml │ │ ├── crows_pairs_french_nationality.yaml │ │ ├── crows_pairs_french_race_color.yaml │ │ └── crows_pairs_french_socioeconomic.yaml │ ├── blimp │ │ ├── causative.yaml │ │ ├── inchoative.yaml │ │ ├── passive_1.yaml │ │ ├── passive_2.yaml │ │ ├── transitive.yaml │ │ ├── wh_island.yaml │ │ ├── intransitive.yaml │ │ ├── drop_argument.yaml │ │ ├── npi_present_1.yaml │ │ ├── npi_present_2.yaml │ │ ├── adjunct_island.yaml │ │ ├── only_npi_scope.yaml │ │ ├── complex_NP_island.yaml │ │ ├── ellipsis_n_bar_1.yaml │ │ ├── ellipsis_n_bar_2.yaml │ │ ├── wh_vs_that_no_gap.yaml │ │ ├── principle_A_case_1.yaml │ │ ├── principle_A_case_2.yaml │ │ ├── tough_vs_raising_1.yaml │ │ ├── tough_vs_raising_2.yaml │ │ ├── principle_A_domain_1.yaml │ │ ├── principle_A_domain_2.yaml │ │ ├── principle_A_domain_3.yaml │ │ ├── wh_vs_that_with_gap.yaml │ │ ├── animate_subject_trans.yaml │ │ ├── principle_A_c_command.yaml │ │ ├── animate_subject_passive.yaml │ │ ├── wh_questions_object_gap.yaml │ │ ├── anaphor_gender_agreement.yaml │ │ ├── anaphor_number_agreement.yaml │ │ ├── only_npi_licensor_present.yaml │ │ ├── sentential_subject_island.yaml │ │ ├── superlative_quantifiers_1.yaml │ │ ├── superlative_quantifiers_2.yaml │ │ ├── wh_questions_subject_gap.yaml │ │ ├── determiner_noun_agreement_1.yaml │ │ ├── determiner_noun_agreement_2.yaml │ │ ├── expletive_it_object_raising.yaml │ │ ├── principle_A_reconstruction.yaml │ │ ├── sentential_negation_npi_scope.yaml │ │ ├── existential_there_object_raising.yaml │ │ ├── existential_there_quantifiers_1.yaml │ │ ├── existential_there_quantifiers_2.yaml │ │ ├── irregular_past_participle_verbs.yaml │ │ ├── left_branch_island_echo_question.yaml │ │ ├── wh_vs_that_no_gap_long_distance.yaml │ │ ├── existential_there_subject_raising.yaml │ │ └── wh_vs_that_with_gap_long_distance.yaml │ ├── qa4mre │ │ ├── qa4mre_2012.yaml │ │ └── qa4mre_2013.yaml │ ├── xwinograd │ │ ├── xwinograd_en.yaml │ │ ├── xwinograd_fr.yaml │ │ ├── xwinograd_jp.yaml │ │ ├── xwinograd_pt.yaml │ │ ├── xwinograd_ru.yaml │ │ └── xwinograd_zh.yaml │ ├── minerva_math │ │ ├── minerva_math_geometry.yaml │ │ ├── minerva_math_precalc.yaml │ │ ├── minerva_math_prealgebra.yaml │ │ ├── minerva_math_num_theory.yaml │ │ ├── minerva_math_counting_and_prob.yaml │ │ └── minerva_math_intermediate_algebra.yaml │ ├── mmlu │ │ └── default │ │ │ └── _mmlu.yaml │ ├── xcopa │ │ ├── default_ht.yaml │ │ ├── default_id.yaml │ │ ├── default_it.yaml │ │ ├── default_qu.yaml │ │ ├── default_sw.yaml │ │ ├── default_ta.yaml │ │ ├── default_th.yaml │ │ ├── default_tr.yaml │ │ ├── default_vi.yaml │ │ └── default_zh.yaml │ ├── anli │ │ ├── anli_r2.yaml │ │ └── anli_r3.yaml │ ├── okapi │ │ └── mmlu_multilingual │ │ │ ├── m_mmlu_ar.yaml │ │ │ ├── m_mmlu_bn.yaml │ │ │ ├── m_mmlu_ca.yaml │ │ │ ├── m_mmlu_da.yaml │ │ │ ├── m_mmlu_de.yaml │ │ │ ├── m_mmlu_en.yaml │ │ │ ├── m_mmlu_es.yaml │ │ │ ├── m_mmlu_eu.yaml │ │ │ ├── m_mmlu_fr.yaml │ │ │ ├── m_mmlu_gu.yaml │ │ │ ├── m_mmlu_hi.yaml │ │ │ ├── m_mmlu_hr.yaml │ │ │ ├── m_mmlu_hu.yaml │ │ │ ├── m_mmlu_hy.yaml │ │ │ ├── m_mmlu_id.yaml │ │ │ ├── m_mmlu_is.yaml │ │ │ ├── m_mmlu_it.yaml │ │ │ ├── m_mmlu_kn.yaml │ │ │ ├── m_mmlu_ml.yaml │ │ │ ├── m_mmlu_mr.yaml │ │ │ ├── m_mmlu_nb.yaml │ │ │ ├── m_mmlu_ne.yaml │ │ │ ├── m_mmlu_nl.yaml │ │ │ ├── m_mmlu_pt.yaml │ │ │ ├── m_mmlu_ro.yaml │ │ │ ├── m_mmlu_ru.yaml │ │ │ ├── m_mmlu_sk.yaml │ │ │ ├── m_mmlu_sr.yaml │ │ │ ├── m_mmlu_sv.yaml │ │ │ ├── m_mmlu_ta.yaml │ │ │ ├── m_mmlu_te.yaml │ │ │ ├── m_mmlu_uk.yaml │ │ │ ├── m_mmlu_vi.yaml │ │ │ └── m_mmlu_zh.yaml │ ├── polemo2 │ │ └── polemo2_out.yaml │ ├── eus_exams │ │ ├── eus_exams_es_ejauxiliar.yaml │ │ ├── eus_exams_es_ejtecnico.yaml │ │ ├── eus_exams_es_opebilbao.yaml │ │ ├── eus_exams_es_opeehuaux.yaml │ │ ├── eus_exams_eu_ejlaguntza.yaml │ │ ├── eus_exams_es_ejsubalterno.yaml │ │ ├── eus_exams_es_opeehuadmin.yaml │ │ ├── eus_exams_es_opeehubiblio.yaml │ │ ├── eus_exams_es_opeosakiaux.yaml │ │ ├── eus_exams_es_opeosakienf.yaml │ │ ├── eus_exams_es_osakidetza1c.yaml │ │ ├── eus_exams_es_osakidetza2c.yaml │ │ ├── eus_exams_es_osakidetza3c.yaml │ │ ├── eus_exams_es_osakidetza4c.yaml │ │ ├── eus_exams_es_osakidetza5c.yaml │ │ ├── eus_exams_es_osakidetza6c.yaml │ │ ├── eus_exams_es_osakidetza7c.yaml │ │ ├── eus_exams_es_osakidetza8c.yaml │ │ ├── eus_exams_es_osakidetza9c.yaml │ │ ├── eus_exams_eu_ejteknikari.yaml │ │ ├── eus_exams_eu_opebilbaoeu.yaml │ │ ├── eus_exams_eu_opeehuauxeu.yaml │ │ ├── eus_exams_eu_osakidetza1e.yaml │ │ ├── eus_exams_eu_osakidetza2e.yaml │ │ ├── eus_exams_eu_osakidetza3e.yaml │ │ ├── eus_exams_eu_osakidetza5e.yaml │ │ ├── eus_exams_eu_osakidetza6e.yaml │ │ ├── eus_exams_eu_osakidetza7e.yaml │ │ ├── eus_exams_es_opeehuderecho.yaml │ │ ├── eus_exams_es_opeehutecnico.yaml │ │ ├── eus_exams_es_opeosakiadmin.yaml │ │ ├── eus_exams_eu_ejlaguntzaile.yaml │ │ ├── eus_exams_eu_opeehuadmineu.yaml │ │ ├── eus_exams_eu_opeosakiauxeu.yaml │ │ ├── eus_exams_eu_opeosakienfeu.yaml │ │ ├── eus_exams_es_opeehutecnicob.yaml │ │ ├── eus_exams_es_opeosakiauxenf.yaml │ │ ├── eus_exams_es_opeosakicelador.yaml │ │ ├── eus_exams_es_opeosakitecnico.yaml │ │ ├── eus_exams_es_opeosakivarios.yaml │ │ ├── eus_exams_eu_ejadministrari.yaml │ │ ├── eus_exams_eu_opeehubiblioeu.yaml │ │ ├── eus_exams_eu_opeehuderechoeu.yaml │ │ ├── eus_exams_eu_opeehutecnicoeu.yaml │ │ ├── eus_exams_eu_opeosakiadmineu.yaml │ │ ├── eus_exams_es_ejadministrativo.yaml │ │ ├── eus_exams_es_opeehueconomicas.yaml │ │ ├── eus_exams_es_opeehusubalterno.yaml │ │ ├── eus_exams_es_opeosakijuridico.yaml │ │ ├── eus_exams_es_opeosakioperario.yaml │ │ ├── eus_exams_eu_opeehuteknikarib.yaml │ │ ├── eus_exams_eu_opegasteizkoudala.yaml │ │ ├── eus_exams_eu_opeosakiauxenfeu.yaml │ │ ├── eus_exams_eu_opeosakiceladoreu.yaml │ │ ├── eus_exams_eu_opeosakitecnicoeu.yaml │ │ ├── eus_exams_eu_opeosakivarioseu.yaml │ │ ├── eus_exams_eu_opeehueconomicaseu.yaml │ │ ├── eus_exams_eu_opeehusubalternoeu.yaml │ │ ├── eus_exams_eu_opeosakioperarioeu.yaml │ │ ├── eus_exams_es_opeehuempresariales.yaml │ │ ├── eus_exams_es_opeayuntamientovitoria.yaml │ │ └── eus_exams_eu_opeehuempresarialeseu.yaml │ ├── gpqa │ │ ├── n_shot │ │ │ ├── gpqa_main_n_shot.yaml │ │ │ ├── gpqa_diamond_n_shot.yaml │ │ │ └── gpqa_extended_n_shot.yaml │ │ ├── zeroshot │ │ │ ├── gpqa_main_zeroshot.yaml │ │ │ ├── gpqa_diamond_zeroshot.yaml │ │ │ └── gpqa_extended_zeroshot.yaml │ │ ├── cot_n_shot │ │ │ ├── gpqa_main_cot_n_shot.yaml │ │ │ ├── gpqa_diamond_cot_n_shot.yaml │ │ │ └── gpqa_extended_cot_n_shot.yaml │ │ ├── cot_zeroshot │ │ │ ├── gpqa_main_cot_zeroshot.yaml │ │ │ ├── gpqa_diamond_cot_zeroshot.yaml │ │ │ └── gpqa_extended_cot_zeroshot.yaml │ │ └── generative │ │ │ └── gpqa_main_generative_n_shot.yaml │ ├── belebele │ │ ├── belebele_acm_Arab.yaml │ │ ├── belebele_afr_Latn.yaml │ │ ├── belebele_als_Latn.yaml │ │ ├── belebele_amh_Ethi.yaml │ │ ├── belebele_apc_Arab.yaml │ │ ├── belebele_arb_Arab.yaml │ │ ├── belebele_arb_Latn.yaml │ │ ├── belebele_ars_Arab.yaml │ │ ├── belebele_ary_Arab.yaml │ │ ├── belebele_arz_Arab.yaml │ │ ├── belebele_asm_Beng.yaml │ │ ├── belebele_azj_Latn.yaml │ │ ├── belebele_bam_Latn.yaml │ │ ├── belebele_ben_Beng.yaml │ │ ├── belebele_ben_Latn.yaml │ │ ├── belebele_bod_Tibt.yaml │ │ ├── belebele_bul_Cyrl.yaml │ │ ├── belebele_cat_Latn.yaml │ │ ├── belebele_ceb_Latn.yaml │ │ ├── belebele_ces_Latn.yaml │ │ ├── belebele_ckb_Arab.yaml │ │ ├── belebele_dan_Latn.yaml │ │ ├── belebele_deu_Latn.yaml │ │ ├── belebele_ell_Grek.yaml │ │ ├── belebele_eng_Latn.yaml │ │ ├── belebele_est_Latn.yaml │ │ ├── belebele_eus_Latn.yaml │ │ ├── belebele_fin_Latn.yaml │ │ ├── belebele_fra_Latn.yaml │ │ ├── belebele_fuv_Latn.yaml │ │ ├── belebele_gaz_Latn.yaml │ │ ├── belebele_grn_Latn.yaml │ │ ├── belebele_guj_Gujr.yaml │ │ ├── belebele_hat_Latn.yaml │ │ ├── belebele_hau_Latn.yaml │ │ ├── belebele_heb_Hebr.yaml │ │ ├── belebele_hin_Deva.yaml │ │ ├── belebele_hin_Latn.yaml │ │ ├── belebele_hrv_Latn.yaml │ │ ├── belebele_hun_Latn.yaml │ │ ├── belebele_hye_Armn.yaml │ │ ├── belebele_ibo_Latn.yaml │ │ ├── belebele_ilo_Latn.yaml │ │ ├── belebele_ind_Latn.yaml │ │ ├── belebele_isl_Latn.yaml │ │ ├── belebele_ita_Latn.yaml │ │ ├── belebele_jav_Latn.yaml │ │ ├── belebele_jpn_Jpan.yaml │ │ ├── belebele_kac_Latn.yaml │ │ ├── belebele_kan_Knda.yaml │ │ ├── belebele_kat_Geor.yaml │ │ ├── belebele_kaz_Cyrl.yaml │ │ ├── belebele_kea_Latn.yaml │ │ ├── belebele_khk_Cyrl.yaml │ │ ├── belebele_khm_Khmr.yaml │ │ ├── belebele_kin_Latn.yaml │ │ ├── belebele_kir_Cyrl.yaml │ │ ├── belebele_kor_Hang.yaml │ │ ├── belebele_lao_Laoo.yaml │ │ ├── belebele_lin_Latn.yaml │ │ ├── belebele_lit_Latn.yaml │ │ ├── belebele_lug_Latn.yaml │ │ ├── belebele_luo_Latn.yaml │ │ ├── belebele_lvs_Latn.yaml │ │ ├── belebele_mal_Mlym.yaml │ │ ├── belebele_mar_Deva.yaml │ │ ├── belebele_mkd_Cyrl.yaml │ │ ├── belebele_mlt_Latn.yaml │ │ ├── belebele_mri_Latn.yaml │ │ ├── belebele_mya_Mymr.yaml │ │ ├── belebele_nld_Latn.yaml │ │ ├── belebele_nob_Latn.yaml │ │ ├── belebele_npi_Deva.yaml │ │ ├── belebele_npi_Latn.yaml │ │ ├── belebele_nso_Latn.yaml │ │ ├── belebele_nya_Latn.yaml │ │ ├── belebele_ory_Orya.yaml │ │ ├── belebele_pan_Guru.yaml │ │ ├── belebele_pbt_Arab.yaml │ │ ├── belebele_pes_Arab.yaml │ │ ├── belebele_plt_Latn.yaml │ │ ├── belebele_pol_Latn.yaml │ │ ├── belebele_por_Latn.yaml │ │ ├── belebele_ron_Latn.yaml │ │ ├── belebele_rus_Cyrl.yaml │ │ ├── belebele_shn_Mymr.yaml │ │ ├── belebele_sin_Latn.yaml │ │ ├── belebele_sin_Sinh.yaml │ │ ├── belebele_slk_Latn.yaml │ │ ├── belebele_slv_Latn.yaml │ │ ├── belebele_sna_Latn.yaml │ │ ├── belebele_snd_Arab.yaml │ │ ├── belebele_som_Latn.yaml │ │ ├── belebele_sot_Latn.yaml │ │ ├── belebele_spa_Latn.yaml │ │ ├── belebele_srp_Cyrl.yaml │ │ ├── belebele_ssw_Latn.yaml │ │ ├── belebele_sun_Latn.yaml │ │ ├── belebele_swe_Latn.yaml │ │ ├── belebele_swh_Latn.yaml │ │ ├── belebele_tam_Taml.yaml │ │ ├── belebele_tel_Telu.yaml │ │ ├── belebele_tgk_Cyrl.yaml │ │ ├── belebele_tgl_Latn.yaml │ │ ├── belebele_tha_Thai.yaml │ │ ├── belebele_tir_Ethi.yaml │ │ ├── belebele_tsn_Latn.yaml │ │ ├── belebele_tso_Latn.yaml │ │ ├── belebele_tur_Latn.yaml │ │ ├── belebele_ukr_Cyrl.yaml │ │ ├── belebele_urd_Arab.yaml │ │ ├── belebele_urd_Latn.yaml │ │ ├── belebele_uzn_Latn.yaml │ │ ├── belebele_vie_Latn.yaml │ │ ├── belebele_war_Latn.yaml │ │ ├── belebele_wol_Latn.yaml │ │ ├── belebele_xho_Latn.yaml │ │ ├── belebele_yor_Latn.yaml │ │ ├── belebele_zho_Hans.yaml │ │ ├── belebele_zho_Hant.yaml │ │ ├── belebele_zsm_Latn.yaml │ │ └── belebele_zul_Latn.yaml │ ├── model_written_evals │ │ └── persona │ │ │ ├── openness.yaml │ │ │ ├── narcissism.yaml │ │ │ ├── neuroticism.yaml │ │ │ ├── psychopathy.yaml │ │ │ ├── risk-averse.yaml │ │ │ ├── agreeableness.yaml │ │ │ ├── extraversion.yaml │ │ │ ├── no-shut-down.yaml │ │ │ ├── risk-neutral.yaml │ │ │ ├── risk-seeking.yaml │ │ │ ├── has-disability.yaml │ │ │ ├── interest-in-art.yaml │ │ │ ├── no-goal-change.yaml │ │ │ ├── anti-immigration.yaml │ │ │ ├── interest-in-math.yaml │ │ │ ├── machiavellianism.yaml │ │ │ ├── self-replication.yaml │ │ │ ├── anti-LGBTQ-rights.yaml │ │ │ ├── conscientiousness.yaml │ │ │ ├── ends-justify-means.yaml │ │ │ ├── high-discount-rate.yaml │ │ │ ├── interest-in-music.yaml │ │ │ ├── interest-in-sports.yaml │ │ │ ├── low-discount-rate.yaml │ │ │ ├── stands-its-ground.yaml │ │ │ ├── high-discount-factor.yaml │ │ │ ├── interest-in-science.yaml │ │ │ ├── low-discount-factor.yaml │ │ │ ├── no-power-discomfort.yaml │ │ │ ├── politically-liberal.yaml │ │ │ ├── resource-acquisition.yaml │ │ │ ├── subscribes-to-Islam.yaml │ │ │ ├── subscribes-to-Taoism.yaml │ │ │ ├── cognitive-enhancement.yaml │ │ │ ├── desire-for-popularity.yaml │ │ │ ├── desire-for-wide-usage.yaml │ │ │ ├── subscribes-to-Atheism.yaml │ │ │ └── subscribes-to-Judaism.yaml │ ├── agieval │ │ ├── jec-qa-ca.yaml │ │ ├── jec-qa-kd.yaml │ │ ├── logiqa-zh.yaml │ │ ├── gaokao-mathqa.yaml │ │ ├── sat-en.yaml │ │ ├── gaokao-biology.yaml │ │ ├── gaokao-chinese.yaml │ │ ├── gaokao-history.yaml │ │ ├── gaokao-physics.yaml │ │ ├── lsat-ar.yaml │ │ ├── lsat-lr.yaml │ │ ├── lsat-rc.yaml │ │ ├── sat-math.yaml │ │ ├── gaokao-chemistry.yaml │ │ ├── gaokao-geography.yaml │ │ └── logiqa-en.yaml │ ├── tmmluplus │ │ └── default │ │ │ └── tmmluplus.yaml │ ├── arithmetic │ │ ├── arithmetic_2da.yaml │ │ ├── arithmetic_2dm.yaml │ │ ├── arithmetic_2ds.yaml │ │ ├── arithmetic_3da.yaml │ │ ├── arithmetic_3ds.yaml │ │ ├── arithmetic_4da.yaml │ │ ├── arithmetic_4ds.yaml │ │ ├── arithmetic_5da.yaml │ │ └── arithmetic_5ds.yaml │ ├── ceval │ │ ├── ceval-valid_law.yaml │ │ ├── ceval-valid_logic.yaml │ │ ├── ceval-valid_marxism.yaml │ │ ├── ceval-valid_physician.yaml │ │ ├── ceval-valid_accountant.yaml │ │ ├── ceval-valid_art_studies.yaml │ │ └── ceval-valid_civil_servant.yaml │ ├── bigbench │ │ ├── generate_until │ │ │ ├── gem.yaml │ │ │ ├── color.yaml │ │ │ ├── snarks.yaml │ │ │ ├── tense.yaml │ │ │ ├── kannada.yaml │ │ │ ├── physics.yaml │ │ │ ├── winowhy.yaml │ │ │ ├── codenames.yaml │ │ │ ├── crass_ai.yaml │ │ │ ├── disfl_qa.yaml │ │ │ ├── multiemo.yaml │ │ │ ├── navigate.yaml │ │ │ ├── operators.yaml │ │ │ ├── rephrase.yaml │ │ │ ├── timedial.yaml │ │ │ ├── arithmetic.yaml │ │ │ ├── cryptonite.yaml │ │ │ ├── emoji_movie.yaml │ │ │ ├── hyperbaton.yaml │ │ │ ├── kanji_ascii.yaml │ │ │ ├── mnist_ascii.yaml │ │ │ ├── odd_one_out.yaml │ │ │ ├── parsinlu_qa.yaml │ │ │ ├── qa_wikidata.yaml │ │ │ ├── ruin_names.yaml │ │ │ ├── social_iqa.yaml │ │ │ ├── strategyqa.yaml │ │ │ ├── anachronisms.yaml │ │ │ ├── fact_checker.yaml │ │ │ ├── few_shot_nlg.yaml │ │ │ ├── implicatures.yaml │ │ │ ├── logical_args.yaml │ │ │ ├── matrixshapes.yaml │ │ │ ├── riddle_sense.yaml │ │ │ ├── suicide_risk.yaml │ │ │ ├── topical_chat.yaml │ │ │ ├── word_sorting.yaml │ │ │ ├── auto_debugging.yaml │ │ │ ├── bbq_lite_json.yaml │ │ │ ├── crash_blossom.yaml │ │ │ ├── cs_algorithms.yaml │ │ │ ├── dyck_languages.yaml │ │ │ ├── hhh_alignment.yaml │ │ │ ├── key_value_maps.yaml │ │ │ ├── known_unknowns.yaml │ │ │ ├── language_games.yaml │ │ │ ├── list_functions.yaml │ │ │ ├── misconceptions.yaml │ │ │ ├── novel_concepts.yaml │ │ │ ├── persian_idioms.yaml │ │ │ └── social_support.yaml │ │ └── multiple_choice │ │ │ ├── gem.yaml │ │ │ ├── color.yaml │ │ │ ├── tense.yaml │ │ │ ├── snarks.yaml │ │ │ ├── crass_ai.yaml │ │ │ ├── disfl_qa.yaml │ │ │ ├── kannada.yaml │ │ │ ├── multiemo.yaml │ │ │ ├── navigate.yaml │ │ │ ├── physics.yaml │ │ │ ├── rephrase.yaml │ │ │ ├── timedial.yaml │ │ │ ├── winowhy.yaml │ │ │ ├── arithmetic.yaml │ │ │ ├── codenames.yaml │ │ │ ├── cryptonite.yaml │ │ │ ├── hyperbaton.yaml │ │ │ ├── operators.yaml │ │ │ ├── ruin_names.yaml │ │ │ ├── social_iqa.yaml │ │ │ ├── strategyqa.yaml │ │ │ ├── emoji_movie.yaml │ │ │ ├── kanji_ascii.yaml │ │ │ ├── mnist_ascii.yaml │ │ │ ├── odd_one_out.yaml │ │ │ ├── parsinlu_qa.yaml │ │ │ ├── qa_wikidata.yaml │ │ │ ├── anachronisms.yaml │ │ │ ├── bbq_lite_json.yaml │ │ │ ├── crash_blossom.yaml │ │ │ └── cs_algorithms.yaml │ ├── cmmlu │ │ ├── cmmlu_default_arts.yaml │ │ ├── cmmlu_default_anatomy.yaml │ │ ├── cmmlu_default_logical.yaml │ │ ├── cmmlu_default_agronomy.yaml │ │ ├── cmmlu_default_genetics.yaml │ │ ├── cmmlu_default_virology.yaml │ │ ├── cmmlu_default_astronomy.yaml │ │ ├── cmmlu_default_economics.yaml │ │ ├── cmmlu_default_education.yaml │ │ ├── cmmlu_default_ethnology.yaml │ │ ├── cmmlu_default_journalism.yaml │ │ ├── cmmlu_default_management.yaml │ │ ├── cmmlu_default_marketing.yaml │ │ ├── cmmlu_default_nutrition.yaml │ │ ├── cmmlu_default_philosophy.yaml │ │ ├── cmmlu_default_sociology.yaml │ │ ├── cmmlu_default_college_law.yaml │ │ ├── cmmlu_default_food_science.yaml │ │ ├── cmmlu_default_global_facts.yaml │ │ ├── cmmlu_default_jurisprudence.yaml │ │ └── cmmlu_default_world_history.yaml │ ├── aexams │ │ ├── aexams_Science.yaml │ │ ├── aexams_Physics.yaml │ │ ├── aexams_Biology.yaml │ │ └── aexams_Social.yaml │ ├── ammlu │ │ ├── ammlu_virology.yaml │ │ ├── ammlu_management.yaml │ │ ├── ammlu_marketing.yaml │ │ ├── ammlu_nutrition.yaml │ │ ├── ammlu_global_facts.yaml │ │ ├── ammlu_human_aging.yaml │ │ ├── ammlu_sociology.yaml │ │ ├── ammlu_miscellaneous.yaml │ │ ├── ammlu_philosophy.yaml │ │ ├── ammlu_prehistory.yaml │ │ ├── ammlu_business_ethics.yaml │ │ ├── ammlu_econometrics.yaml │ │ ├── ammlu_formal_logic.yaml │ │ └── ammlu_jurisprudence.yaml │ ├── toxigen │ │ └── utils.py │ └── aclue │ │ ├── aclue_ancient_medical.yaml │ │ ├── aclue_ancient_phonetics.yaml │ │ ├── aclue_couplet_prediction.yaml │ │ └── aclue_poetry_appreciate.yaml ├── decontamination │ └── __init__.py └── __init__.py └── README.md /decompose/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lm_eval/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lm_eval/tasks/nq_open/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lm_eval/decontamination/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lm_eval/__init__.py: -------------------------------------------------------------------------------- 1 | from .evaluator import evaluate, simple_evaluate 2 | -------------------------------------------------------------------------------- /lm_eval/tasks/squadv2/squadv2.yaml: -------------------------------------------------------------------------------- 1 | task: squadv2 2 | class: !function task.SQuAD2 3 | -------------------------------------------------------------------------------- /lm_eval/tasks/fld/fld_star.yaml: -------------------------------------------------------------------------------- 1 | include: fld_default.yaml 2 | task: fld_star 3 | dataset_name: star 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/headqa/headqa_es.yaml: -------------------------------------------------------------------------------- 1 | include: headqa_en.yaml 2 | task: headqa_es 3 | dataset_name: es 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_enron.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_enron 3 | dataset_name: pile_enron 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_uspto.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_uspto 3 | dataset_name: pile_uspto 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/mutual/multual_plus.yaml: -------------------------------------------------------------------------------- 1 | include: mutual.yaml 2 | task: mutual_plus 3 | dataset_name: mutual_plus 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_books3.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_books3 3 | dataset_name: pile_books3 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_github.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_github 3 | dataset_name: pile_github 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/scrolls/scrolls_qasper.yaml: -------------------------------------------------------------------------------- 1 | group: scrolls 2 | task: scrolls_qasper 3 | class: !function task.Qasper 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/scrolls/scrolls_qmsum.yaml: -------------------------------------------------------------------------------- 1 | group: scrolls 2 | task: scrolls_qmsum 3 | class: !function task.QMSum 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/xstorycloze/default_en.yaml: -------------------------------------------------------------------------------- 1 | include: default_ar.yaml 2 | task: xstorycloze_en 3 | dataset_name: en 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/xstorycloze/default_es.yaml: -------------------------------------------------------------------------------- 1 | include: default_ar.yaml 2 | task: xstorycloze_es 3 | dataset_name: es 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/xstorycloze/default_eu.yaml: -------------------------------------------------------------------------------- 1 | include: default_ar.yaml 2 | task: xstorycloze_eu 3 | dataset_name: eu 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/xstorycloze/default_hi.yaml: -------------------------------------------------------------------------------- 1 | include: default_ar.yaml 2 | task: xstorycloze_hi 3 | dataset_name: hi 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/xstorycloze/default_id.yaml: -------------------------------------------------------------------------------- 1 | include: default_ar.yaml 2 | task: xstorycloze_id 3 | dataset_name: id 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/xstorycloze/default_my.yaml: -------------------------------------------------------------------------------- 1 | include: default_ar.yaml 2 | task: xstorycloze_my 3 | dataset_name: my 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/xstorycloze/default_ru.yaml: -------------------------------------------------------------------------------- 1 | include: default_ar.yaml 2 | task: xstorycloze_ru 3 | dataset_name: ru 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/xstorycloze/default_sw.yaml: -------------------------------------------------------------------------------- 1 | include: default_ar.yaml 2 | task: xstorycloze_sw 3 | dataset_name: sw 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/xstorycloze/default_te.yaml: -------------------------------------------------------------------------------- 1 | include: default_ar.yaml 2 | task: xstorycloze_te 3 | dataset_name: te 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/xstorycloze/default_zh.yaml: -------------------------------------------------------------------------------- 1 | include: default_ar.yaml 2 | task: xstorycloze_zh 3 | dataset_name: zh 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/arc/arc_challenge.yaml: -------------------------------------------------------------------------------- 1 | include: arc_easy.yaml 2 | task: arc_challenge 3 | dataset_name: ARC-Challenge 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/csatqa/csatqa_gr.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "GR" 2 | "include": "_default_csatqa_yaml" 3 | "task": "csatqa_gr" 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/csatqa/csatqa_li.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "LI" 2 | "include": "_default_csatqa_yaml" 3 | "task": "csatqa_li" 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/csatqa/csatqa_wr.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "WR" 2 | "include": "_default_csatqa_yaml" 3 | "task": "csatqa_wr" 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: law 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_law 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_europarl.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_europarl 3 | dataset_name: pile_europarl 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_freelaw.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_freelaw 3 | dataset_name: pile_freelaw 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_pile-cc.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_pile-cc 3 | dataset_name: pile_pile-cc 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/scrolls/scrolls_quality.yaml: -------------------------------------------------------------------------------- 1 | group: scrolls 2 | task: scrolls_quality 3 | class: !function task.QuALITY 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/csatqa/csatqa_rch.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "RCH" 2 | "include": "_default_csatqa_yaml" 3 | "task": "csatqa_rch" 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/csatqa/csatqa_rcs.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "RCS" 2 | "include": "_default_csatqa_yaml" 3 | "task": "csatqa_rcs" 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/csatqa/csatqa_rcss.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "RCSS" 2 | "include": "_default_csatqa_yaml" 3 | "task": "csatqa_rcss" 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: math 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_math 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_gutenberg.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_gutenberg 3 | dataset_name: pile_gutenberg 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_hackernews.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_hackernews 3 | dataset_name: pile_hackernews 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_philpapers.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_philpapers 3 | dataset_name: pile_philpapers 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_ubuntu-irc.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_ubuntu-irc 3 | dataset_name: pile_ubuntu-irc 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_wikipedia.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_wikipedia 3 | dataset_name: pile_wikipedia 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/scrolls/scrolls_govreport.yaml: -------------------------------------------------------------------------------- 1 | group: scrolls 2 | task: scrolls_govreport 3 | class: !function task.GovReport 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/glue/mnli/mismatch.yaml: -------------------------------------------------------------------------------- 1 | include: default.yaml 2 | task: mnli_mismatch 3 | validation_split: validation_mismatched 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/haerae/haerae_hi.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "history" 2 | "include": "_default_haerae_yaml" 3 | "task": "haerae_history" 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Law 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_law 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Math 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_math 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: biology 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_biology 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: ecology 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_ecology 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: fashion 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_fashion 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: health 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_health 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: patent 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_patent 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_bookcorpus2.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_bookcorpus2 3 | dataset_name: pile_bookcorpus2 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/scrolls/scrolls_contractnli.yaml: -------------------------------------------------------------------------------- 1 | group: scrolls 2 | task: scrolls_contractnli 3 | class: !function task.ContractNLI 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/scrolls/scrolls_narrativeqa.yaml: -------------------------------------------------------------------------------- 1 | group: scrolls 2 | task: scrolls_narrativeqa 3 | class: !function task.NarrativeQA 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/french_bench/_default_template_yaml: -------------------------------------------------------------------------------- 1 | test_split: test 2 | fewshot_split: valid 3 | fewshot_config: 4 | sampler: first_n 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/haerae/haerae_lw.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "loan_words" 2 | "include": "_default_haerae_yaml" 3 | "task": "haerae_loan_word" 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/haerae/haerae_rw.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "rare_words" 2 | "include": "_default_haerae_yaml" 3 | "task": "haerae_rare_word" 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Health 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_health 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Patent 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_patent 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: taxation 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_taxation 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/lambada_multilingual/lambada_mt_de.yaml: -------------------------------------------------------------------------------- 1 | include: lambada_mt_en.yaml 2 | task: lambada_openai_mt_de 3 | dataset_name: de 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/lambada_multilingual/lambada_mt_es.yaml: -------------------------------------------------------------------------------- 1 | include: lambada_mt_en.yaml 2 | task: lambada_openai_mt_es 3 | dataset_name: es 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/lambada_multilingual/lambada_mt_fr.yaml: -------------------------------------------------------------------------------- 1 | include: lambada_mt_en.yaml 2 | task: lambada_openai_mt_fr 3 | dataset_name: fr 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/lambada_multilingual/lambada_mt_it.yaml: -------------------------------------------------------------------------------- 1 | include: lambada_mt_en.yaml 2 | task: lambada_openai_mt_it 3 | dataset_name: it 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_nih-exporter.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_nih-exporter 3 | dataset_name: pile_nih-exporter 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_opensubtitles.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_opensubtitles 3 | dataset_name: pile_opensubtitles 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_openwebtext2.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_openwebtext2 3 | dataset_name: pile_openwebtext2 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_stackexchange.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_stackexchange 3 | dataset_name: pile_stackexchange 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/scrolls/scrolls_summscreenfd.yaml: -------------------------------------------------------------------------------- 1 | group: scrolls 2 | task: scrolls_summscreenfd 3 | class: !function task.SummScreenFD 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/xnli_eu/xnli_eu_mt.yaml: -------------------------------------------------------------------------------- 1 | include: xnli_eu.yaml 2 | group: xnli_eu_mt_native 3 | task: xnli_eu_mt 4 | dataset_name: eu_mt 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/crows_pairs/crows_pairs_french.yaml: -------------------------------------------------------------------------------- 1 | include: crows_pairs_english.yaml 2 | task: crows_pairs_french 3 | dataset_name: french 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Biology 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_biology 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Ecology 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_ecology 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Fashion 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_fashion 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: accounting 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_accounting 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: chemistry 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_chemistry 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: economics 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_economics 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: education 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_education 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: geomatics 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_geomatics 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: management 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_management 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: marketing 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_marketing 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: psychology 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_psychology 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_dm-mathematics.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_dm-mathematics 3 | dataset_name: pile_dm-mathematics 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_pubmed-central.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_pubmed-central 3 | dataset_name: pile_pubmed-central 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/causative.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: causative 3 | include: _template_yaml 4 | task: blimp_causative 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/inchoative.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: inchoative 3 | include: _template_yaml 4 | task: blimp_inchoative 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/passive_1.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: passive_1 3 | include: _template_yaml 4 | task: blimp_passive_1 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/passive_2.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: passive_2 3 | include: _template_yaml 4 | task: blimp_passive_2 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/transitive.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: transitive 3 | include: _template_yaml 4 | task: blimp_transitive 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/wh_island.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: wh_island 3 | include: _template_yaml 4 | task: blimp_wh_island 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Chemistry 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_chemistry 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Economics 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_economics 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Education 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_education 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Geomatics 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_geomatics 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Marketing 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_marketing 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Taxation 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_taxation 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: construction 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_construction 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: criminal_law 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_criminal_law 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: real_estate 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_real_estate 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_pubmed-abstracts.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_pubmed-abstracts 3 | dataset_name: pile_pubmed-abstracts 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/pile/pile_youtubesubtitles.yaml: -------------------------------------------------------------------------------- 1 | include: pile_arxiv.yaml 2 | task: pile_youtubesubtitles 3 | dataset_name: pile_youtubesubtitles 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/qa4mre/qa4mre_2012.yaml: -------------------------------------------------------------------------------- 1 | include: qa4mre_2011.yaml 2 | task: qa4mre_2012 3 | dataset_path: qa4mre 4 | dataset_name: 2012.main.EN 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/qa4mre/qa4mre_2013.yaml: -------------------------------------------------------------------------------- 1 | include: qa4mre_2011.yaml 2 | task: qa4mre_2013 3 | dataset_path: qa4mre 4 | dataset_name: 2013.main.EN 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/xwinograd/xwinograd_en.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: en 3 | include: xwinograd_common_yaml 4 | task: xwinograd_en 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/xwinograd/xwinograd_fr.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: fr 3 | include: xwinograd_common_yaml 4 | task: xwinograd_fr 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/xwinograd/xwinograd_jp.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: jp 3 | include: xwinograd_common_yaml 4 | task: xwinograd_jp 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/xwinograd/xwinograd_pt.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: pt 3 | include: xwinograd_common_yaml 4 | task: xwinograd_pt 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/xwinograd/xwinograd_ru.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: ru 3 | include: xwinograd_common_yaml 4 | task: xwinograd_ru 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/xwinograd/xwinograd_zh.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: zh 3 | include: xwinograd_common_yaml 4 | task: xwinograd_zh 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/intransitive.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: intransitive 3 | include: _template_yaml 4 | task: blimp_intransitive 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/haerae/haerae_gk.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "general_knowledge" 2 | "include": "_default_haerae_yaml" 3 | "task": "haerae_general_knowledge" 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Accounting 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_accounting 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Management 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_management 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Psychology 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_psychology 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Real-Estate 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_real_estate 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_law.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: law 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_law 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_math.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: math 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_math 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: public_safety 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_public_safety 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/minerva_math/minerva_math_geometry.yaml: -------------------------------------------------------------------------------- 1 | include: minerva_math_algebra.yaml 2 | dataset_name: geometry 3 | task: minerva_math_geometry 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/minerva_math/minerva_math_precalc.yaml: -------------------------------------------------------------------------------- 1 | include: minerva_math_algebra.yaml 2 | dataset_name: precalculus 3 | task: minerva_math_precalc 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/mmlu/default/_mmlu.yaml: -------------------------------------------------------------------------------- 1 | group: mmlu 2 | task: 3 | - mmlu_stem 4 | - mmlu_other 5 | - mmlu_social_sciences 6 | - mmlu_humanities 7 | -------------------------------------------------------------------------------- /lm_eval/tasks/xcopa/default_ht.yaml: -------------------------------------------------------------------------------- 1 | include: default_et.yaml 2 | task: xcopa_ht 3 | dataset_name: ht 4 | doc_to_text: !function utils.doc_to_text_ht 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/xcopa/default_id.yaml: -------------------------------------------------------------------------------- 1 | include: default_et.yaml 2 | task: xcopa_id 3 | dataset_name: id 4 | doc_to_text: !function utils.doc_to_text_id 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/xcopa/default_it.yaml: -------------------------------------------------------------------------------- 1 | include: default_et.yaml 2 | task: xcopa_it 3 | dataset_name: it 4 | doc_to_text: !function utils.doc_to_text_it 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/xcopa/default_qu.yaml: -------------------------------------------------------------------------------- 1 | include: default_et.yaml 2 | task: xcopa_qu 3 | dataset_name: qu 4 | doc_to_text: !function utils.doc_to_text_qu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/xcopa/default_sw.yaml: -------------------------------------------------------------------------------- 1 | include: default_et.yaml 2 | task: xcopa_sw 3 | dataset_name: sw 4 | doc_to_text: !function utils.doc_to_text_sw 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/xcopa/default_ta.yaml: -------------------------------------------------------------------------------- 1 | include: default_et.yaml 2 | task: xcopa_ta 3 | dataset_name: ta 4 | doc_to_text: !function utils.doc_to_text_ta 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/xcopa/default_th.yaml: -------------------------------------------------------------------------------- 1 | include: default_et.yaml 2 | task: xcopa_th 3 | dataset_name: th 4 | doc_to_text: !function utils.doc_to_text_th 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/xcopa/default_tr.yaml: -------------------------------------------------------------------------------- 1 | include: default_et.yaml 2 | task: xcopa_tr 3 | dataset_name: tr 4 | doc_to_text: !function utils.doc_to_text_tr 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/xcopa/default_vi.yaml: -------------------------------------------------------------------------------- 1 | include: default_et.yaml 2 | task: xcopa_vi 3 | dataset_name: vi 4 | doc_to_text: !function utils.doc_to_text_vi 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/xcopa/default_zh.yaml: -------------------------------------------------------------------------------- 1 | include: default_et.yaml 2 | task: xcopa_zh 3 | dataset_name: zh 4 | doc_to_text: !function utils.doc_to_text_zh 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/anli/anli_r2.yaml: -------------------------------------------------------------------------------- 1 | include: anli_r1.yaml 2 | task: anli_r2 3 | training_split: train_r2 4 | validation_split: dev_r2 5 | test_split: test_r2 6 | -------------------------------------------------------------------------------- /lm_eval/tasks/anli/anli_r3.yaml: -------------------------------------------------------------------------------- 1 | include: anli_r1.yaml 2 | task: anli_r3 3 | training_split: train_r3 4 | validation_split: dev_r3 5 | test_split: test_r3 6 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/drop_argument.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: drop_argument 3 | include: _template_yaml 4 | task: blimp_drop_argument 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/npi_present_1.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: npi_present_1 3 | include: _template_yaml 4 | task: blimp_npi_present_1 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/npi_present_2.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: npi_present_2 3 | include: _template_yaml 4 | task: blimp_npi_present_2 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Construction 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_construction 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Criminal-Law 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_criminal_law 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_health.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: health 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_health 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: patent 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_patent 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: food_processing 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_food_processing 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: korean_history 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_korean_history 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: social_welfare 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_social_welfare 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/minerva_math/minerva_math_prealgebra.yaml: -------------------------------------------------------------------------------- 1 | include: minerva_math_algebra.yaml 2 | dataset_name: prealgebra 3 | task: minerva_math_prealgebra 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/adjunct_island.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: adjunct_island 3 | include: _template_yaml 4 | task: blimp_adjunct_island 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/only_npi_scope.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: only_npi_scope 3 | include: _template_yaml 4 | task: blimp_only_npi_scope 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/haerae/haerae_sn.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "standard_nomenclature" 2 | "include": "_default_haerae_yaml" 3 | "task": "haerae_standard_nomenclature" 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Korean-History 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_korean_history 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Public-Safety 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_public_safety 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Social-Welfare 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_social_welfare 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: biology 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_biology 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: ecology 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_ecology 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: fashion 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_fashion 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: civil_engineering 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_civil_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: computer_science 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_computer_science 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: energy_management 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_energy_management 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/minerva_math/minerva_math_num_theory.yaml: -------------------------------------------------------------------------------- 1 | include: minerva_math_algebra.yaml 2 | dataset_name: number_theory 3 | task: minerva_math_num_theory 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ar.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: ar 3 | include: _default_yaml 4 | task: m_mmlu_ar 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_bn.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: bn 3 | include: _default_yaml 4 | task: m_mmlu_bn 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ca.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: ca 3 | include: _default_yaml 4 | task: m_mmlu_ca 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_da.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: da 3 | include: _default_yaml 4 | task: m_mmlu_da 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_de.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: de 3 | include: _default_yaml 4 | task: m_mmlu_de 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_en.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: en 3 | include: _default_yaml 4 | task: m_mmlu_en 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_es.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: es 3 | include: _default_yaml 4 | task: m_mmlu_es 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_eu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: eu 3 | include: _default_yaml 4 | task: m_mmlu_eu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_fr.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: fr 3 | include: _default_yaml 4 | task: m_mmlu_fr 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_gu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gu 3 | include: _default_yaml 4 | task: m_mmlu_gu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_hi.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: hi 3 | include: _default_yaml 4 | task: m_mmlu_hi 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_hr.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: hr 3 | include: _default_yaml 4 | task: m_mmlu_hr 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_hu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: hu 3 | include: _default_yaml 4 | task: m_mmlu_hu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_hy.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: hy 3 | include: _default_yaml 4 | task: m_mmlu_hy 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_id.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: id 3 | include: _default_yaml 4 | task: m_mmlu_id 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_is.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: is 3 | include: _default_yaml 4 | task: m_mmlu_is 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_it.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: it 3 | include: _default_yaml 4 | task: m_mmlu_it 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_kn.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: kn 3 | include: _default_yaml 4 | task: m_mmlu_kn 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ml.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: ml 3 | include: _default_yaml 4 | task: m_mmlu_ml 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_mr.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: mr 3 | include: _default_yaml 4 | task: m_mmlu_mr 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_nb.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: nb 3 | include: _default_yaml 4 | task: m_mmlu_nb 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ne.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: ne 3 | include: _default_yaml 4 | task: m_mmlu_ne 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_nl.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: nl 3 | include: _default_yaml 4 | task: m_mmlu_nl 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_pt.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: pt 3 | include: _default_yaml 4 | task: m_mmlu_pt 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ro.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: ro 3 | include: _default_yaml 4 | task: m_mmlu_ro 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ru.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: ru 3 | include: _default_yaml 4 | task: m_mmlu_ru 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_sk.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: sk 3 | include: _default_yaml 4 | task: m_mmlu_sk 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_sr.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: sr 3 | include: _default_yaml 4 | task: m_mmlu_sr 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_sv.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: sv 3 | include: _default_yaml 4 | task: m_mmlu_sv 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ta.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: ta 3 | include: _default_yaml 4 | task: m_mmlu_ta 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_te.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: te 3 | include: _default_yaml 4 | task: m_mmlu_te 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_uk.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: uk 3 | include: _default_yaml 4 | task: m_mmlu_uk 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_vi.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: vi 3 | include: _default_yaml 4 | task: m_mmlu_vi 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_zh.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: zh 3 | include: _default_yaml 4 | task: m_mmlu_zh 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EigenAttn 2 | 3 | Official repository for "Eigen Attention : Attention in Low-Rank Space for KV Cache Compression". 4 | 5 | Code to be added soon... 6 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/complex_NP_island.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: complex_NP_island 3 | include: _template_yaml 4 | task: blimp_complex_NP_island 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/ellipsis_n_bar_1.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: ellipsis_n_bar_1 3 | include: _template_yaml 4 | task: blimp_ellipsis_n_bar_1 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/ellipsis_n_bar_2.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: ellipsis_n_bar_2 3 | include: _template_yaml 4 | task: blimp_ellipsis_n_bar_2 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/wh_vs_that_no_gap.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: wh_vs_that_no_gap 3 | include: _template_yaml 4 | task: blimp_wh_vs_that_no_gap 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Computer-Science 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_computer_science 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Food-Processing 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_food_processing 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: chemistry 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_chemistry 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_economics.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: economics 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_economics 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_education.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: education 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_education 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_geomatics.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: geomatics 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_geomatics 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_marketing.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: marketing 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_marketing 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: taxation 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_taxation 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/polemo2/polemo2_out.yaml: -------------------------------------------------------------------------------- 1 | include: polemo2_in.yaml 2 | task: polemo2_out 3 | dataset_path: allegro/klej-polemo2-out 4 | dataset_name: klej-polemo2-out 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/principle_A_case_1.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: principle_A_case_1 3 | include: _template_yaml 4 | task: blimp_principle_A_case_1 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/principle_A_case_2.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: principle_A_case_2 3 | include: _template_yaml 4 | task: blimp_principle_A_case_2 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/tough_vs_raising_1.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: tough_vs_raising_1 3 | include: _template_yaml 4 | task: blimp_tough_vs_raising_1 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/tough_vs_raising_2.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: tough_vs_raising_2 3 | include: _template_yaml 4 | task: blimp_tough_vs_raising_2 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_ejauxiliar.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_ejauxiliar 3 | include: eus_exams_es 4 | task: eus_exams_es_ejauxiliar 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_ejtecnico.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_ejtecnico 3 | include: eus_exams_es 4 | task: eus_exams_es_ejtecnico 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opebilbao.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opebilbao 3 | include: eus_exams_es 4 | task: eus_exams_es_opebilbao 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeehuaux.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeehuaux 3 | include: eus_exams_es 4 | task: eus_exams_es_opeehuaux 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_ejlaguntza.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_ejlaguntza 3 | include: eus_exams_eu 4 | task: eus_exams_eu_ejlaguntza 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/gpqa/n_shot/gpqa_main_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_main 3 | include: _gpqa_n_shot_yaml 4 | task: gpqa_main_n_shot 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Civil-Engineering 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_civil_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Energy-Management 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_energy_management 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: accounting 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_accounting 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_management.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: management 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_management 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_psychology.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: psychology 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_psychology 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_real_estate.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: real_estate 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_real_estate 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: chemical_engineering 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_chemical_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: industrial_engineer 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_industrial_engineer 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: maritime_engineering 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_maritime_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_acm_Arab.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "acm_Arab" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_acm_Arab" 4 | "test_split": "acm_Arab" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_afr_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "afr_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_afr_Latn" 4 | "test_split": "afr_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_als_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "als_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_als_Latn" 4 | "test_split": "als_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_amh_Ethi.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "amh_Ethi" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_amh_Ethi" 4 | "test_split": "amh_Ethi" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_apc_Arab.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "apc_Arab" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_apc_Arab" 4 | "test_split": "apc_Arab" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_arb_Arab.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "arb_Arab" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_arb_Arab" 4 | "test_split": "arb_Arab" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_arb_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "arb_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_arb_Latn" 4 | "test_split": "arb_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_ars_Arab.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "ars_Arab" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_ars_Arab" 4 | "test_split": "ars_Arab" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_ary_Arab.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "ary_Arab" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_ary_Arab" 4 | "test_split": "ary_Arab" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_arz_Arab.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "arz_Arab" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_arz_Arab" 4 | "test_split": "arz_Arab" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_asm_Beng.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "asm_Beng" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_asm_Beng" 4 | "test_split": "asm_Beng" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_azj_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "azj_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_azj_Latn" 4 | "test_split": "azj_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_bam_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "bam_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_bam_Latn" 4 | "test_split": "bam_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_ben_Beng.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "ben_Beng" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_ben_Beng" 4 | "test_split": "ben_Beng" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_ben_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "ben_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_ben_Latn" 4 | "test_split": "ben_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_bod_Tibt.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "bod_Tibt" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_bod_Tibt" 4 | "test_split": "bod_Tibt" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_bul_Cyrl.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "bul_Cyrl" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_bul_Cyrl" 4 | "test_split": "bul_Cyrl" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_cat_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "cat_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_cat_Latn" 4 | "test_split": "cat_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_ceb_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "ceb_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_ceb_Latn" 4 | "test_split": "ceb_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_ces_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "ces_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_ces_Latn" 4 | "test_split": "ces_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_ckb_Arab.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "ckb_Arab" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_ckb_Arab" 4 | "test_split": "ckb_Arab" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_dan_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "dan_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_dan_Latn" 4 | "test_split": "dan_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_deu_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "deu_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_deu_Latn" 4 | "test_split": "deu_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_ell_Grek.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "ell_Grek" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_ell_Grek" 4 | "test_split": "ell_Grek" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_eng_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "eng_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_eng_Latn" 4 | "test_split": "eng_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_est_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "est_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_est_Latn" 4 | "test_split": "est_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_eus_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "eus_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_eus_Latn" 4 | "test_split": "eus_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_fin_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "fin_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_fin_Latn" 4 | "test_split": "fin_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_fra_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "fra_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_fra_Latn" 4 | "test_split": "fra_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_fuv_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "fuv_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_fuv_Latn" 4 | "test_split": "fuv_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_gaz_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "gaz_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_gaz_Latn" 4 | "test_split": "gaz_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_grn_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "grn_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_grn_Latn" 4 | "test_split": "grn_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_guj_Gujr.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "guj_Gujr" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_guj_Gujr" 4 | "test_split": "guj_Gujr" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_hat_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "hat_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_hat_Latn" 4 | "test_split": "hat_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_hau_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "hau_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_hau_Latn" 4 | "test_split": "hau_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_heb_Hebr.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "heb_Hebr" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_heb_Hebr" 4 | "test_split": "heb_Hebr" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_hin_Deva.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "hin_Deva" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_hin_Deva" 4 | "test_split": "hin_Deva" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_hin_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "hin_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_hin_Latn" 4 | "test_split": "hin_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_hrv_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "hrv_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_hrv_Latn" 4 | "test_split": "hrv_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_hun_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "hun_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_hun_Latn" 4 | "test_split": "hun_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_hye_Armn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "hye_Armn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_hye_Armn" 4 | "test_split": "hye_Armn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_ibo_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "ibo_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_ibo_Latn" 4 | "test_split": "ibo_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_ilo_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "ilo_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_ilo_Latn" 4 | "test_split": "ilo_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_ind_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "ind_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_ind_Latn" 4 | "test_split": "ind_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_isl_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "isl_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_isl_Latn" 4 | "test_split": "isl_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_ita_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "ita_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_ita_Latn" 4 | "test_split": "ita_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_jav_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "jav_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_jav_Latn" 4 | "test_split": "jav_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_jpn_Jpan.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "jpn_Jpan" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_jpn_Jpan" 4 | "test_split": "jpn_Jpan" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_kac_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "kac_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_kac_Latn" 4 | "test_split": "kac_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_kan_Knda.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "kan_Knda" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_kan_Knda" 4 | "test_split": "kan_Knda" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_kat_Geor.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "kat_Geor" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_kat_Geor" 4 | "test_split": "kat_Geor" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_kaz_Cyrl.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "kaz_Cyrl" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_kaz_Cyrl" 4 | "test_split": "kaz_Cyrl" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_kea_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "kea_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_kea_Latn" 4 | "test_split": "kea_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_khk_Cyrl.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "khk_Cyrl" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_khk_Cyrl" 4 | "test_split": "khk_Cyrl" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_khm_Khmr.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "khm_Khmr" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_khm_Khmr" 4 | "test_split": "khm_Khmr" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_kin_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "kin_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_kin_Latn" 4 | "test_split": "kin_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_kir_Cyrl.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "kir_Cyrl" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_kir_Cyrl" 4 | "test_split": "kir_Cyrl" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_kor_Hang.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "kor_Hang" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_kor_Hang" 4 | "test_split": "kor_Hang" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_lao_Laoo.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "lao_Laoo" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_lao_Laoo" 4 | "test_split": "lao_Laoo" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_lin_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "lin_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_lin_Latn" 4 | "test_split": "lin_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_lit_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "lit_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_lit_Latn" 4 | "test_split": "lit_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_lug_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "lug_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_lug_Latn" 4 | "test_split": "lug_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_luo_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "luo_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_luo_Latn" 4 | "test_split": "luo_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_lvs_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "lvs_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_lvs_Latn" 4 | "test_split": "lvs_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_mal_Mlym.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "mal_Mlym" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_mal_Mlym" 4 | "test_split": "mal_Mlym" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_mar_Deva.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "mar_Deva" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_mar_Deva" 4 | "test_split": "mar_Deva" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_mkd_Cyrl.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "mkd_Cyrl" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_mkd_Cyrl" 4 | "test_split": "mkd_Cyrl" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_mlt_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "mlt_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_mlt_Latn" 4 | "test_split": "mlt_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_mri_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "mri_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_mri_Latn" 4 | "test_split": "mri_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_mya_Mymr.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "mya_Mymr" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_mya_Mymr" 4 | "test_split": "mya_Mymr" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_nld_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "nld_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_nld_Latn" 4 | "test_split": "nld_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_nob_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "nob_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_nob_Latn" 4 | "test_split": "nob_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_npi_Deva.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "npi_Deva" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_npi_Deva" 4 | "test_split": "npi_Deva" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_npi_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "npi_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_npi_Latn" 4 | "test_split": "npi_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_nso_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "nso_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_nso_Latn" 4 | "test_split": "nso_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_nya_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "nya_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_nya_Latn" 4 | "test_split": "nya_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_ory_Orya.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "ory_Orya" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_ory_Orya" 4 | "test_split": "ory_Orya" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_pan_Guru.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "pan_Guru" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_pan_Guru" 4 | "test_split": "pan_Guru" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_pbt_Arab.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "pbt_Arab" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_pbt_Arab" 4 | "test_split": "pbt_Arab" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_pes_Arab.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "pes_Arab" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_pes_Arab" 4 | "test_split": "pes_Arab" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_plt_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "plt_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_plt_Latn" 4 | "test_split": "plt_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_pol_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "pol_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_pol_Latn" 4 | "test_split": "pol_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_por_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "por_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_por_Latn" 4 | "test_split": "por_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_ron_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "ron_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_ron_Latn" 4 | "test_split": "ron_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_rus_Cyrl.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "rus_Cyrl" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_rus_Cyrl" 4 | "test_split": "rus_Cyrl" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_shn_Mymr.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "shn_Mymr" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_shn_Mymr" 4 | "test_split": "shn_Mymr" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_sin_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "sin_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_sin_Latn" 4 | "test_split": "sin_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_sin_Sinh.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "sin_Sinh" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_sin_Sinh" 4 | "test_split": "sin_Sinh" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_slk_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "slk_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_slk_Latn" 4 | "test_split": "slk_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_slv_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "slv_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_slv_Latn" 4 | "test_split": "slv_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_sna_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "sna_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_sna_Latn" 4 | "test_split": "sna_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_snd_Arab.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "snd_Arab" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_snd_Arab" 4 | "test_split": "snd_Arab" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_som_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "som_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_som_Latn" 4 | "test_split": "som_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_sot_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "sot_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_sot_Latn" 4 | "test_split": "sot_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_spa_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "spa_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_spa_Latn" 4 | "test_split": "spa_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_srp_Cyrl.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "srp_Cyrl" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_srp_Cyrl" 4 | "test_split": "srp_Cyrl" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_ssw_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "ssw_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_ssw_Latn" 4 | "test_split": "ssw_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_sun_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "sun_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_sun_Latn" 4 | "test_split": "sun_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_swe_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "swe_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_swe_Latn" 4 | "test_split": "swe_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_swh_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "swh_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_swh_Latn" 4 | "test_split": "swh_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_tam_Taml.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "tam_Taml" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_tam_Taml" 4 | "test_split": "tam_Taml" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_tel_Telu.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "tel_Telu" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_tel_Telu" 4 | "test_split": "tel_Telu" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_tgk_Cyrl.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "tgk_Cyrl" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_tgk_Cyrl" 4 | "test_split": "tgk_Cyrl" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_tgl_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "tgl_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_tgl_Latn" 4 | "test_split": "tgl_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_tha_Thai.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "tha_Thai" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_tha_Thai" 4 | "test_split": "tha_Thai" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_tir_Ethi.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "tir_Ethi" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_tir_Ethi" 4 | "test_split": "tir_Ethi" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_tsn_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "tsn_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_tsn_Latn" 4 | "test_split": "tsn_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_tso_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "tso_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_tso_Latn" 4 | "test_split": "tso_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_tur_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "tur_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_tur_Latn" 4 | "test_split": "tur_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_ukr_Cyrl.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "ukr_Cyrl" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_ukr_Cyrl" 4 | "test_split": "ukr_Cyrl" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_urd_Arab.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "urd_Arab" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_urd_Arab" 4 | "test_split": "urd_Arab" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_urd_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "urd_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_urd_Latn" 4 | "test_split": "urd_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_uzn_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "uzn_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_uzn_Latn" 4 | "test_split": "uzn_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_vie_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "vie_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_vie_Latn" 4 | "test_split": "vie_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_war_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "war_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_war_Latn" 4 | "test_split": "war_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_wol_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "wol_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_wol_Latn" 4 | "test_split": "wol_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_xho_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "xho_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_xho_Latn" 4 | "test_split": "xho_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_yor_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "yor_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_yor_Latn" 4 | "test_split": "yor_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_zho_Hans.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "zho_Hans" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_zho_Hans" 4 | "test_split": "zho_Hans" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_zho_Hant.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "zho_Hant" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_zho_Hant" 4 | "test_split": "zho_Hant" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_zsm_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "zsm_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_zsm_Latn" 4 | "test_split": "zsm_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/belebele/belebele_zul_Latn.yaml: -------------------------------------------------------------------------------- 1 | "fewshot_split": "zul_Latn" 2 | "include": "_default_template_yaml" 3 | "task": "belebele_zul_Latn" 4 | "test_split": "zul_Latn" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/principle_A_domain_1.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: principle_A_domain_1 3 | include: _template_yaml 4 | task: blimp_principle_A_domain_1 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/principle_A_domain_2.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: principle_A_domain_2 3 | include: _template_yaml 4 | task: blimp_principle_A_domain_2 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/principle_A_domain_3.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: principle_A_domain_3 3 | include: _template_yaml 4 | task: blimp_principle_A_domain_3 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/wh_vs_that_with_gap.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: wh_vs_that_with_gap 3 | include: _template_yaml 4 | task: blimp_wh_vs_that_with_gap 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_ejsubalterno.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_ejsubalterno 3 | include: eus_exams_es 4 | task: eus_exams_es_ejsubalterno 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeehuadmin.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeehuadmin 3 | include: eus_exams_es 4 | task: eus_exams_es_opeehuadmin 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeehubiblio.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeehubiblio 3 | include: eus_exams_es 4 | task: eus_exams_es_opeehubiblio 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeosakiaux.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeosakiaux 3 | include: eus_exams_es 4 | task: eus_exams_es_opeosakiaux 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeosakienf.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeosakienf 3 | include: eus_exams_es 4 | task: eus_exams_es_opeosakienf 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_osakidetza1c.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_osakidetza1c 3 | include: eus_exams_es 4 | task: eus_exams_es_osakidetza1c 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_osakidetza2c.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_osakidetza2c 3 | include: eus_exams_es 4 | task: eus_exams_es_osakidetza2c 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_osakidetza3c.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_osakidetza3c 3 | include: eus_exams_es 4 | task: eus_exams_es_osakidetza3c 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_osakidetza4c.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_osakidetza4c 3 | include: eus_exams_es 4 | task: eus_exams_es_osakidetza4c 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_osakidetza5c.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_osakidetza5c 3 | include: eus_exams_es 4 | task: eus_exams_es_osakidetza5c 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_osakidetza6c.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_osakidetza6c 3 | include: eus_exams_es 4 | task: eus_exams_es_osakidetza6c 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_osakidetza7c.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_osakidetza7c 3 | include: eus_exams_es 4 | task: eus_exams_es_osakidetza7c 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_osakidetza8c.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_osakidetza8c 3 | include: eus_exams_es 4 | task: eus_exams_es_osakidetza8c 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_osakidetza9c.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_osakidetza9c 3 | include: eus_exams_es 4 | task: eus_exams_es_osakidetza9c 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_ejteknikari.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_ejteknikari 3 | include: eus_exams_eu 4 | task: eus_exams_eu_ejteknikari 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opebilbaoeu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opebilbaoeu 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opebilbaoeu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opeehuauxeu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opeehuauxeu 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opeehuauxeu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza1e.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_osakidetza1e 3 | include: eus_exams_eu 4 | task: eus_exams_eu_osakidetza1e 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza2e.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_osakidetza2e 3 | include: eus_exams_eu 4 | task: eus_exams_eu_osakidetza2e 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza3e.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_osakidetza3e 3 | include: eus_exams_eu 4 | task: eus_exams_eu_osakidetza3e 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza5e.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_osakidetza5e 3 | include: eus_exams_eu 4 | task: eus_exams_eu_osakidetza5e 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza6e.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_osakidetza6e 3 | include: eus_exams_eu 4 | task: eus_exams_eu_osakidetza6e 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza7e.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_osakidetza7e 3 | include: eus_exams_eu 4 | task: eus_exams_eu_osakidetza7e 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Industrial-Engineer 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_industrial_engineer 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_construction.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: construction 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_construction 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_criminal_law.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: criminal_law 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_criminal_law 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: agricultural_sciences 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_agricultural_sciences 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: electrical_engineering 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_electrical_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: environmental_science 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_environmental_science 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: information_technology 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_information_technology 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: materials_engineering 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_materials_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: mechanical_engineering 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_mechanical_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: nondestructive_testing 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_nondestructive_testing 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/openness.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: openness 3 | include: _template_yaml 4 | task: persona_openness 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/agieval/jec-qa-ca.yaml: -------------------------------------------------------------------------------- 1 | include: aqua-rat.yaml 2 | group: 3 | - agieval 4 | - agieval_cn 5 | task: agieval_jec_qa_ca 6 | dataset_path: hails/agieval-jec-qa-ca 7 | -------------------------------------------------------------------------------- /lm_eval/tasks/agieval/jec-qa-kd.yaml: -------------------------------------------------------------------------------- 1 | include: aqua-rat.yaml 2 | group: 3 | - agieval 4 | - agieval_cn 5 | task: agieval_jec_qa_kd 6 | dataset_path: hails/agieval-jec-qa-kd 7 | -------------------------------------------------------------------------------- /lm_eval/tasks/agieval/logiqa-zh.yaml: -------------------------------------------------------------------------------- 1 | include: aqua-rat.yaml 2 | group: 3 | - agieval 4 | - agieval_cn 5 | task: agieval_logiqa_zh 6 | dataset_path: hails/agieval-logiqa-zh 7 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/animate_subject_trans.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: animate_subject_trans 3 | include: _template_yaml 4 | task: blimp_animate_subject_trans 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/principle_A_c_command.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: principle_A_c_command 3 | include: _template_yaml 4 | task: blimp_principle_A_c_command 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeehuderecho.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeehuderecho 3 | include: eus_exams_es 4 | task: eus_exams_es_opeehuderecho 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeehutecnico.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeehutecnico 3 | include: eus_exams_es 4 | task: eus_exams_es_opeehutecnico 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeosakiadmin.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeosakiadmin 3 | include: eus_exams_es 4 | task: eus_exams_es_opeosakiadmin 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_ejlaguntzaile.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_ejlaguntzaile 3 | include: eus_exams_eu 4 | task: eus_exams_eu_ejlaguntzaile 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opeehuadmineu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opeehuadmineu 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opeehuadmineu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opeosakiauxeu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opeosakiauxeu 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opeosakiauxeu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opeosakienfeu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opeosakienfeu 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opeosakienfeu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/gpqa/n_shot/gpqa_diamond_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_diamond 3 | include: _gpqa_n_shot_yaml 4 | task: gpqa_diamond_n_shot 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/gpqa/zeroshot/gpqa_main_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_main 3 | include: _gpqa_zeroshot_yaml 4 | task: gpqa_main_zeroshot 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Agricultural-Sciences 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_agricultural_sciences 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemical_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Chemical-Engineering 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_chemical_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Environmental-Science 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_environmental_science 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Maritime-Engineering 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_maritime_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Materials-Engineering 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_materials_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_korean_history.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: korean_history 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_korean_history 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_public_safety.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: public_safety 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_public_safety 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_social_welfare.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: social_welfare 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_social_welfare 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: electronics_engineering 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_electronics_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_refrigerating_machinery.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: refrigerating_machinery 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_refrigerating_machinery 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/narcissism.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: narcissism 3 | include: _template_yaml 4 | task: persona_narcissism 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/tmmluplus/default/tmmluplus.yaml: -------------------------------------------------------------------------------- 1 | group: tmmluplus 2 | task: 3 | - tmmluplus_other 4 | - tmmluplus_social_sciences 5 | - tmmluplus_humanities 6 | - tmmluplus_STEM 7 | -------------------------------------------------------------------------------- /lm_eval/tasks/arithmetic/arithmetic_2da.yaml: -------------------------------------------------------------------------------- 1 | include: arithmetic_1dc.yaml 2 | task: arithmetic_2da 3 | dataset_name: arithmetic_2da 4 | dataset_kwargs: 5 | trust_remote_code: true 6 | -------------------------------------------------------------------------------- /lm_eval/tasks/arithmetic/arithmetic_2dm.yaml: -------------------------------------------------------------------------------- 1 | include: arithmetic_1dc.yaml 2 | task: arithmetic_2dm 3 | dataset_name: arithmetic_2dm 4 | dataset_kwargs: 5 | trust_remote_code: true 6 | -------------------------------------------------------------------------------- /lm_eval/tasks/arithmetic/arithmetic_2ds.yaml: -------------------------------------------------------------------------------- 1 | include: arithmetic_1dc.yaml 2 | task: arithmetic_2ds 3 | dataset_name: arithmetic_2ds 4 | dataset_kwargs: 5 | trust_remote_code: true 6 | -------------------------------------------------------------------------------- /lm_eval/tasks/arithmetic/arithmetic_3da.yaml: -------------------------------------------------------------------------------- 1 | include: arithmetic_1dc.yaml 2 | task: arithmetic_3da 3 | dataset_name: arithmetic_3da 4 | dataset_kwargs: 5 | trust_remote_code: true 6 | -------------------------------------------------------------------------------- /lm_eval/tasks/arithmetic/arithmetic_3ds.yaml: -------------------------------------------------------------------------------- 1 | include: arithmetic_1dc.yaml 2 | task: arithmetic_3ds 3 | dataset_name: arithmetic_3ds 4 | dataset_kwargs: 5 | trust_remote_code: true 6 | -------------------------------------------------------------------------------- /lm_eval/tasks/arithmetic/arithmetic_4da.yaml: -------------------------------------------------------------------------------- 1 | include: arithmetic_1dc.yaml 2 | task: arithmetic_4da 3 | dataset_name: arithmetic_4da 4 | dataset_kwargs: 5 | trust_remote_code: true 6 | -------------------------------------------------------------------------------- /lm_eval/tasks/arithmetic/arithmetic_4ds.yaml: -------------------------------------------------------------------------------- 1 | include: arithmetic_1dc.yaml 2 | task: arithmetic_4ds 3 | dataset_name: arithmetic_4ds 4 | dataset_kwargs: 5 | trust_remote_code: true 6 | -------------------------------------------------------------------------------- /lm_eval/tasks/arithmetic/arithmetic_5da.yaml: -------------------------------------------------------------------------------- 1 | include: arithmetic_1dc.yaml 2 | task: arithmetic_5da 3 | dataset_name: arithmetic_5da 4 | dataset_kwargs: 5 | trust_remote_code: true 6 | -------------------------------------------------------------------------------- /lm_eval/tasks/arithmetic/arithmetic_5ds.yaml: -------------------------------------------------------------------------------- 1 | include: arithmetic_1dc.yaml 2 | task: arithmetic_5ds 3 | dataset_name: arithmetic_5ds 4 | dataset_kwargs: 5 | trust_remote_code: true 6 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/animate_subject_passive.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: animate_subject_passive 3 | include: _template_yaml 4 | task: blimp_animate_subject_passive 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/wh_questions_object_gap.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: wh_questions_object_gap 3 | include: _template_yaml 4 | task: blimp_wh_questions_object_gap 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ceval/ceval-valid_law.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "law" 2 | "description": "以下是中国关于法学的单项选择题,请选出其中的正确答案。\n\n" 3 | "include": "_default_ceval_yaml" 4 | "task": "ceval-valid_law" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeehutecnicob.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeehutecnicob 3 | include: eus_exams_es 4 | task: eus_exams_es_opeehutecnicob 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeosakiauxenf.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeosakiauxenf 3 | include: eus_exams_es 4 | task: eus_exams_es_opeosakiauxenf 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeosakicelador.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeosakicelador 3 | include: eus_exams_es 4 | task: eus_exams_es_opeosakicelador 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeosakitecnico.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeosakitecnico 3 | include: eus_exams_es 4 | task: eus_exams_es_opeosakitecnico 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeosakivarios.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeosakivarios 3 | include: eus_exams_es 4 | task: eus_exams_es_opeosakivarios 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_ejadministrari.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_ejadministrari 3 | include: eus_exams_eu 4 | task: eus_exams_eu_ejadministrari 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opeehubiblioeu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opeehubiblioeu 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opeehubiblioeu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opeehuderechoeu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opeehuderechoeu 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opeehuderechoeu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opeehutecnicoeu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opeehutecnicoeu 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opeehutecnicoeu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opeosakiadmineu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opeosakiadmineu 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opeosakiadmineu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/gpqa/n_shot/gpqa_extended_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_extended 3 | include: _gpqa_n_shot_yaml 4 | task: gpqa_extended_n_shot 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Electrical-Engineering 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_electrical_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Information-Technology 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_information_technology 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Mechanical-Engineering 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_mechanical_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Nondestructive-Testing 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_nondestructive_testing 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_computer_science.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: computer_science 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_computer_science 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_food_processing.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: food_processing 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_food_processing 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/minerva_math/minerva_math_counting_and_prob.yaml: -------------------------------------------------------------------------------- 1 | include: minerva_math_algebra.yaml 2 | dataset_name: counting_and_probability 3 | task: minerva_math_counting_and_prob 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/minerva_math/minerva_math_intermediate_algebra.yaml: -------------------------------------------------------------------------------- 1 | include: minerva_math_algebra.yaml 2 | dataset_name: intermediate_algebra 3 | task: minerva_math_intermediate_algebra 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/neuroticism.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: neuroticism 3 | include: _template_yaml 4 | task: persona_neuroticism 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/psychopathy.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: psychopathy 3 | include: _template_yaml 4 | task: persona_psychopathy 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/risk-averse.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: risk-averse 3 | include: _template_yaml 4 | task: persona_risk-averse 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/agieval/gaokao-mathqa.yaml: -------------------------------------------------------------------------------- 1 | include: aqua-rat.yaml 2 | group: 3 | - agieval 4 | - agieval_cn 5 | task: agieval_gaokao_mathqa 6 | dataset_path: hails/agieval-gaokao-mathqa 7 | -------------------------------------------------------------------------------- /lm_eval/tasks/agieval/sat-en.yaml: -------------------------------------------------------------------------------- 1 | include: aqua-rat.yaml 2 | group: 3 | - agieval 4 | - agieval_nous 5 | - agieval_en 6 | task: agieval_sat_en 7 | dataset_path: hails/agieval-sat-en 8 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/gem.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: gem_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_gem_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/gem.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: gem_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_gem_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/anaphor_gender_agreement.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: anaphor_gender_agreement 3 | include: _template_yaml 4 | task: blimp_anaphor_gender_agreement 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/anaphor_number_agreement.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: anaphor_number_agreement 3 | include: _template_yaml 4 | task: blimp_anaphor_number_agreement 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/only_npi_licensor_present.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: only_npi_licensor_present 3 | include: _template_yaml 4 | task: blimp_only_npi_licensor_present 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/sentential_subject_island.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: sentential_subject_island 3 | include: _template_yaml 4 | task: blimp_sentential_subject_island 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/superlative_quantifiers_1.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: superlative_quantifiers_1 3 | include: _template_yaml 4 | task: blimp_superlative_quantifiers_1 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/superlative_quantifiers_2.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: superlative_quantifiers_2 3 | include: _template_yaml 4 | task: blimp_superlative_quantifiers_2 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/wh_questions_subject_gap.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: wh_questions_subject_gap 3 | include: _template_yaml 4 | task: blimp_wh_questions_subject_gap 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ceval/ceval-valid_logic.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "logic" 2 | "description": "以下是中国关于逻辑学的单项选择题,请选出其中的正确答案。\n\n" 3 | "include": "_default_ceval_yaml" 4 | "task": "ceval-valid_logic" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_arts.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "arts" 2 | "description": "以下是关于艺术学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_arts" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_ejadministrativo.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_ejadministrativo 3 | include: eus_exams_es 4 | task: eus_exams_es_ejadministrativo 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeehueconomicas.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeehueconomicas 3 | include: eus_exams_es 4 | task: eus_exams_es_opeehueconomicas 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeehusubalterno.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeehusubalterno 3 | include: eus_exams_es 4 | task: eus_exams_es_opeehusubalterno 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeosakijuridico.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeosakijuridico 3 | include: eus_exams_es 4 | task: eus_exams_es_opeosakijuridico 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeosakioperario.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeosakioperario 3 | include: eus_exams_es 4 | task: eus_exams_es_opeosakioperario 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opeehuteknikarib.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opeehuteknikarib 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opeehuteknikarib 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opegasteizkoudala.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opegasteizkoudala 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opegasteizkoudala 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opeosakiauxenfeu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opeosakiauxenfeu 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opeosakiauxenfeu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opeosakiceladoreu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opeosakiceladoreu 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opeosakiceladoreu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opeosakitecnicoeu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opeosakitecnicoeu 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opeosakitecnicoeu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opeosakivarioseu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opeosakivarioseu 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opeosakivarioseu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/gpqa/cot_n_shot/gpqa_main_cot_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_main 3 | include: _gpqa_cot_n_shot_yaml 4 | task: gpqa_main_cot_n_shot 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/gpqa/zeroshot/gpqa_diamond_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_diamond 3 | include: _gpqa_zeroshot_yaml 4 | task: gpqa_diamond_zeroshot 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/gpqa/zeroshot/gpqa_extended_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_extended 3 | include: _gpqa_zeroshot_yaml 4 | task: gpqa_extended_zeroshot 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Electronics-Engineering 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_electronics_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Refrigerating-Machinery 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_refrigerating_machinery 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: civil_engineering 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_civil_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_energy_management.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: energy_management 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_energy_management 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/agreeableness.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: agreeableness 3 | include: _template_yaml 4 | task: persona_agreeableness 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/extraversion.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: extraversion 3 | include: _template_yaml 4 | task: persona_extraversion 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/no-shut-down.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: no-shut-down 3 | include: _template_yaml 4 | task: persona_no-shut-down 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/risk-neutral.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: risk-neutral 3 | include: _template_yaml 4 | task: persona_risk-neutral 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/risk-seeking.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: risk-seeking 3 | include: _template_yaml 4 | task: persona_risk-seeking 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/agieval/gaokao-biology.yaml: -------------------------------------------------------------------------------- 1 | include: aqua-rat.yaml 2 | group: 3 | - agieval 4 | - agieval_cn 5 | task: agieval_gaokao_biology 6 | dataset_path: hails/agieval-gaokao-biology 7 | -------------------------------------------------------------------------------- /lm_eval/tasks/agieval/gaokao-chinese.yaml: -------------------------------------------------------------------------------- 1 | include: aqua-rat.yaml 2 | group: 3 | - agieval 4 | - agieval_cn 5 | task: agieval_gaokao_chinese 6 | dataset_path: hails/agieval-gaokao-chinese 7 | -------------------------------------------------------------------------------- /lm_eval/tasks/agieval/gaokao-history.yaml: -------------------------------------------------------------------------------- 1 | include: aqua-rat.yaml 2 | group: 3 | - agieval 4 | - agieval_cn 5 | task: agieval_gaokao_history 6 | dataset_path: hails/agieval-gaokao-history 7 | -------------------------------------------------------------------------------- /lm_eval/tasks/agieval/gaokao-physics.yaml: -------------------------------------------------------------------------------- 1 | include: aqua-rat.yaml 2 | group: 3 | - agieval 4 | - agieval_cn 5 | task: agieval_gaokao_physics 6 | dataset_path: hails/agieval-gaokao-physics 7 | -------------------------------------------------------------------------------- /lm_eval/tasks/agieval/lsat-ar.yaml: -------------------------------------------------------------------------------- 1 | include: aqua-rat.yaml 2 | group: 3 | - agieval 4 | - agieval_nous 5 | - agieval_en 6 | task: agieval_lsat_ar 7 | dataset_path: hails/agieval-lsat-ar 8 | -------------------------------------------------------------------------------- /lm_eval/tasks/agieval/lsat-lr.yaml: -------------------------------------------------------------------------------- 1 | include: aqua-rat.yaml 2 | group: 3 | - agieval 4 | - agieval_nous 5 | - agieval_en 6 | task: agieval_lsat_lr 7 | dataset_path: hails/agieval-lsat-lr 8 | -------------------------------------------------------------------------------- /lm_eval/tasks/agieval/lsat-rc.yaml: -------------------------------------------------------------------------------- 1 | include: aqua-rat.yaml 2 | group: 3 | - agieval 4 | - agieval_nous 5 | - agieval_en 6 | task: agieval_lsat_rc 7 | dataset_path: hails/agieval-lsat-rc 8 | -------------------------------------------------------------------------------- /lm_eval/tasks/agieval/sat-math.yaml: -------------------------------------------------------------------------------- 1 | include: aqua-rat.yaml 2 | group: 3 | - agieval 4 | - agieval_nous 5 | - agieval_en 6 | task: agieval_sat_math 7 | dataset_path: hails/agieval-sat-math 8 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/color.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: color_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_color_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/snarks.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: snarks_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_snarks_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/tense.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: tense_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_tense_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/color.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: color_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_color_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/tense.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: tense_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_tense_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/determiner_noun_agreement_1.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: determiner_noun_agreement_1 3 | include: _template_yaml 4 | task: blimp_determiner_noun_agreement_1 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/determiner_noun_agreement_2.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: determiner_noun_agreement_2 3 | include: _template_yaml 4 | task: blimp_determiner_noun_agreement_2 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/expletive_it_object_raising.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: expletive_it_object_raising 3 | include: _template_yaml 4 | task: blimp_expletive_it_object_raising 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/principle_A_reconstruction.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: principle_A_reconstruction 3 | include: _template_yaml 4 | task: blimp_principle_A_reconstruction 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_anatomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "anatomy" 2 | "description": "以下是关于解剖学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_anatomy" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_logical.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "logical" 2 | "description": "以下是关于逻辑学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_logical" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/crows_pairs/crows_pairs_english_age.yaml: -------------------------------------------------------------------------------- 1 | include: crows_pairs_english.yaml 2 | task: crows_pairs_english_age 3 | dataset_name: english 4 | process_docs: !function utils.filter_age 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/crows_pairs/crows_pairs_french_age.yaml: -------------------------------------------------------------------------------- 1 | include: crows_pairs_english.yaml 2 | task: crows_pairs_french_age 3 | dataset_name: french 4 | process_docs: !function utils.filter_age 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opeehueconomicaseu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opeehueconomicaseu 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opeehueconomicaseu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opeehusubalternoeu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opeehusubalternoeu 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opeehusubalternoeu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opeosakioperarioeu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opeosakioperarioeu 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opeosakioperarioeu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/gpqa/cot_n_shot/gpqa_diamond_cot_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_diamond 3 | include: _gpqa_cot_n_shot_yaml 4 | task: gpqa_diamond_cot_n_shot 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/gpqa/cot_zeroshot/gpqa_main_cot_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_main 3 | include: _gpqa_cot_zeroshot_yaml 4 | task: gpqa_main_cot_zeroshot 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_industrial_engineer.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: industrial_engineer 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_industrial_engineer 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/has-disability.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: has-disability 3 | include: _template_yaml 4 | task: persona_has-disability 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/interest-in-art.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: interest-in-art 3 | include: _template_yaml 4 | task: persona_interest-in-art 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/no-goal-change.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: no-goal-change 3 | include: _template_yaml 4 | task: persona_no-goal-change 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/aexams/aexams_Science.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "Science" 2 | "description": "قم بالإجابة على مايلي في مجال العلوم \n\n" 3 | "include": "_default_template_yaml" 4 | "task": "aexams_Science" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/agieval/gaokao-chemistry.yaml: -------------------------------------------------------------------------------- 1 | include: aqua-rat.yaml 2 | group: 3 | - agieval 4 | - agieval_cn 5 | task: agieval_gaokao_chemistry 6 | dataset_path: hails/agieval-gaokao-chemistry 7 | -------------------------------------------------------------------------------- /lm_eval/tasks/agieval/gaokao-geography.yaml: -------------------------------------------------------------------------------- 1 | include: aqua-rat.yaml 2 | group: 3 | - agieval 4 | - agieval_cn 5 | task: agieval_gaokao_geography 6 | dataset_path: hails/agieval-gaokao-geography 7 | -------------------------------------------------------------------------------- /lm_eval/tasks/agieval/logiqa-en.yaml: -------------------------------------------------------------------------------- 1 | include: aqua-rat.yaml 2 | group: 3 | - agieval 4 | - agieval_nous 5 | - agieval_en 6 | task: agieval_logiqa_en 7 | dataset_path: hails/agieval-logiqa-en 8 | -------------------------------------------------------------------------------- /lm_eval/tasks/ammlu/ammlu_virology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "virology" 2 | "description": "فم بعملية التقييم في مجال علوم أخرى \n\n" 3 | "include": "_default_template_yaml" 4 | "task": "ammlu_virology" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/kannada.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: kannada_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_kannada_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/physics.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: physics_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_physics_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/winowhy.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: winowhy_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_winowhy_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/snarks.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: snarks_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_snarks_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_agronomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "agronomy" 2 | "description": "以下是关于农学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_agronomy" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_genetics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "genetics" 2 | "description": "以下是关于遗传学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_genetics" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_virology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "virology" 2 | "description": "以下是关于病毒学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_virology" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/crows_pairs/crows_pairs_english_autre.yaml: -------------------------------------------------------------------------------- 1 | include: crows_pairs_english.yaml 2 | task: crows_pairs_english_autre 3 | dataset_name: english 4 | process_docs: !function utils.filter_autre 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/crows_pairs/crows_pairs_french_autre.yaml: -------------------------------------------------------------------------------- 1 | include: crows_pairs_english.yaml 2 | task: crows_pairs_french_autre 3 | dataset_name: french 4 | process_docs: !function utils.filter_autre 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/crows_pairs/crows_pairs_french_gender.yaml: -------------------------------------------------------------------------------- 1 | include: crows_pairs_english.yaml 2 | task: crows_pairs_french_gender 3 | dataset_name: french 4 | process_docs: !function utils.filter_gender 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeehuempresariales.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeehuempresariales 3 | include: eus_exams_es 4 | task: eus_exams_es_opeehuempresariales 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/gpqa/cot_n_shot/gpqa_extended_cot_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_extended 3 | include: _gpqa_cot_n_shot_yaml 4 | task: gpqa_extended_cot_n_shot 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: agricultural_sciences 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_agricultural_sciences 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemical_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: chemical_engineering 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_chemical_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_environmental_science.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: environmental_science 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_environmental_science 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_maritime_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: maritime_engineering 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_maritime_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_materials_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: materials_engineering 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_materials_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: gas_technology_and_engineering 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_gas_technology_and_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/anti-immigration.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: anti-immigration 3 | include: _template_yaml 4 | task: persona_anti-immigration 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/interest-in-math.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: interest-in-math 3 | include: _template_yaml 4 | task: persona_interest-in-math 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/machiavellianism.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: machiavellianism 3 | include: _template_yaml 4 | task: persona_machiavellianism 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/self-replication.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: self-replication 3 | include: _template_yaml 4 | task: persona_self-replication 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/xnli_eu/xnli_eu_native.yaml: -------------------------------------------------------------------------------- 1 | include: xnli_eu.yaml 2 | group: xnli_eu_mt_native 3 | task: xnli_eu_native 4 | training_split: null 5 | validation_split: null 6 | dataset_name: eu_native 7 | -------------------------------------------------------------------------------- /lm_eval/tasks/aexams/aexams_Physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "Physics" 2 | "description": "قم بالإجابة على مايلي في مجال الفيزياء \n\n" 3 | "include": "_default_template_yaml" 4 | "task": "aexams_Physics" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ammlu/ammlu_management.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "management" 2 | "description": "فم بعملية التقييم في مجال علوم أخرى \n\n" 3 | "include": "_default_template_yaml" 4 | "task": "ammlu_management" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ammlu/ammlu_marketing.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "marketing" 2 | "description": "فم بعملية التقييم في مجال علوم أخرى \n\n" 3 | "include": "_default_template_yaml" 4 | "task": "ammlu_marketing" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ammlu/ammlu_nutrition.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "nutrition" 2 | "description": "فم بعملية التقييم في مجال علوم أخرى \n\n" 3 | "include": "_default_template_yaml" 4 | "task": "ammlu_nutrition" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/codenames.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: codenames_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_codenames_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/crass_ai.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: crass_ai_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_crass_ai_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/disfl_qa.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: disfl_qa_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_disfl_qa_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/multiemo.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: multiemo_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_multiemo_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/navigate.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: navigate_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_navigate_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/operators.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: operators_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_operators_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/rephrase.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: rephrase_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_rephrase_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/timedial.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: timedial_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_timedial_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: crass_ai_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_crass_ai_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: disfl_qa_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_disfl_qa_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/kannada.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: kannada_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_kannada_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/multiemo.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: multiemo_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_multiemo_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/navigate.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: navigate_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_navigate_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/physics.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: physics_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_physics_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/rephrase.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: rephrase_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_rephrase_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/timedial.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: timedial_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_timedial_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/winowhy.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: winowhy_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_winowhy_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/sentential_negation_npi_scope.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: sentential_negation_npi_scope 3 | include: _template_yaml 4 | task: blimp_sentential_negation_npi_scope 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ceval/ceval-valid_marxism.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "marxism" 2 | "description": "以下是中国关于马克思主义基本原理的单项选择题,请选出其中的正确答案。\n\n" 3 | "include": "_default_ceval_yaml" 4 | "task": "ceval-valid_marxism" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ceval/ceval-valid_physician.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "physician" 2 | "description": "以下是中国关于医师资格的单项选择题,请选出其中的正确答案。\n\n" 3 | "include": "_default_ceval_yaml" 4 | "task": "ceval-valid_physician" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_astronomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "astronomy" 2 | "description": "以下是关于天文学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_astronomy" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_economics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "economics" 2 | "description": "以下是关于经济学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_economics" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_education.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "education" 2 | "description": "以下是关于教育学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_education" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_ethnology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "ethnology" 2 | "description": "以下是关于民族学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_ethnology" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_journalism.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "journalism" 2 | "description": "以下是关于新闻学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_journalism" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_management.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "management" 2 | "description": "以下是关于管理学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_management" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_marketing.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "marketing" 2 | "description": "以下是关于市场营销的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_marketing" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_nutrition.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "nutrition" 2 | "description": "以下是关于营养学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_nutrition" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_philosophy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "philosophy" 2 | "description": "以下是关于哲学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_philosophy" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_sociology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "sociology" 2 | "description": "以下是关于社会学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_sociology" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/crows_pairs/crows_pairs_english_gender.yaml: -------------------------------------------------------------------------------- 1 | include: crows_pairs_english.yaml 2 | task: crows_pairs_english_gender 3 | dataset_name: english 4 | process_docs: !function utils.filter_gender 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_es_opeayuntamientovitoria.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: es_opeayuntamientovitoria 3 | include: eus_exams_es 4 | task: eus_exams_es_opeayuntamientovitoria 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/eus_exams/eus_exams_eu_opeehuempresarialeseu.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: eu_opeehuempresarialeseu 3 | include: eus_exams_eu 4 | task: eus_exams_eu_opeehuempresarialeseu 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/gpqa/cot_zeroshot/gpqa_diamond_cot_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_diamond 3 | include: _gpqa_cot_zeroshot_yaml 4 | task: gpqa_diamond_cot_zeroshot 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electrical_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: electrical_engineering 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_electrical_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_information_technology.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: information_technology 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_information_technology 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_mechanical_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: mechanical_engineering 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_mechanical_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_nondestructive_testing.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: nondestructive_testing 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_nondestructive_testing 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: interior_architecture_and_design 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_interior_architecture_and_design 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: machine_design_and_manufacturing 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_machine_design_and_manufacturing 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: political_science_and_sociology 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_political_science_and_sociology 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/anti-LGBTQ-rights.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: anti-LGBTQ-rights 3 | include: _template_yaml 4 | task: persona_anti-LGBTQ-rights 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/conscientiousness.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: conscientiousness 3 | include: _template_yaml 4 | task: persona_conscientiousness 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/ends-justify-means.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: ends-justify-means 3 | include: _template_yaml 4 | task: persona_ends-justify-means 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/high-discount-rate.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: high-discount-rate 3 | include: _template_yaml 4 | task: persona_high-discount-rate 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/interest-in-music.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: interest-in-music 3 | include: _template_yaml 4 | task: persona_interest-in-music 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/interest-in-sports.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: interest-in-sports 3 | include: _template_yaml 4 | task: persona_interest-in-sports 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/low-discount-rate.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: low-discount-rate 3 | include: _template_yaml 4 | task: persona_low-discount-rate 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/stands-its-ground.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: stands-its-ground 3 | include: _template_yaml 4 | task: persona_stands-its-ground 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/aexams/aexams_Biology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "Biology" 2 | "description": "قم بالإجابة على مايلي في مجال العلوم الحيوية\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "aexams_Biology" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/aexams/aexams_Social.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "Social" 2 | "description": "قم بالإجابة على مايلي في مجال العلوم الإجتماعية \n\n" 3 | "include": "_default_template_yaml" 4 | "task": "aexams_Social" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ammlu/ammlu_global_facts.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "global_facts" 2 | "description": "فم بعملية التقييم في مجال علوم أخرى \n\n" 3 | "include": "_default_template_yaml" 4 | "task": "ammlu_global_facts" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ammlu/ammlu_human_aging.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "human_aging" 2 | "description": "فم بعملية التقييم في مجال علوم أخرى \n\n" 3 | "include": "_default_template_yaml" 4 | "task": "ammlu_human_aging" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ammlu/ammlu_sociology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "sociology" 2 | "description": "فم بعملية التقييم في مجال العلوم الإجتماعية \n\n" 3 | "include": "_default_template_yaml" 4 | "task": "ammlu_sociology" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/arithmetic.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: arithmetic_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_arithmetic_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/cryptonite.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: cryptonite_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_cryptonite_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/emoji_movie.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: emoji_movie_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_emoji_movie_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/hyperbaton.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: hyperbaton_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_hyperbaton_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/kanji_ascii.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: kanji_ascii_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_kanji_ascii_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/mnist_ascii.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: mnist_ascii_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_mnist_ascii_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/odd_one_out.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: odd_one_out_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_odd_one_out_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/parsinlu_qa.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: parsinlu_qa_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_parsinlu_qa_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/qa_wikidata.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: qa_wikidata_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_qa_wikidata_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/ruin_names.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: ruin_names_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_ruin_names_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/social_iqa.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: social_iqa_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_social_iqa_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/strategyqa.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: strategyqa_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_strategyqa_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: arithmetic_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_arithmetic_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/codenames.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: codenames_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_codenames_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: cryptonite_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_cryptonite_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/hyperbaton.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: hyperbaton_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_hyperbaton_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/operators.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: operators_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_operators_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/ruin_names.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: ruin_names_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_ruin_names_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/social_iqa.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: social_iqa_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_social_iqa_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/strategyqa.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: strategyqa_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_strategyqa_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/existential_there_object_raising.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: existential_there_object_raising 3 | include: _template_yaml 4 | task: blimp_existential_there_object_raising 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/existential_there_quantifiers_1.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: existential_there_quantifiers_1 3 | include: _template_yaml 4 | task: blimp_existential_there_quantifiers_1 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/existential_there_quantifiers_2.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: existential_there_quantifiers_2 3 | include: _template_yaml 4 | task: blimp_existential_there_quantifiers_2 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/irregular_past_participle_verbs.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: irregular_past_participle_verbs 3 | include: _template_yaml 4 | task: blimp_irregular_past_participle_verbs 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/left_branch_island_echo_question.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: left_branch_island_echo_question 3 | include: _template_yaml 4 | task: blimp_left_branch_island_echo_question 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/wh_vs_that_no_gap_long_distance.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: wh_vs_that_no_gap_long_distance 3 | include: _template_yaml 4 | task: blimp_wh_vs_that_no_gap_long_distance 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ceval/ceval-valid_accountant.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "accountant" 2 | "description": "以下是中国关于注册会计师的单项选择题,请选出其中的正确答案。\n\n" 3 | "include": "_default_ceval_yaml" 4 | "task": "ceval-valid_accountant" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ceval/ceval-valid_art_studies.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "art_studies" 2 | "description": "以下是中国关于艺术学的单项选择题,请选出其中的正确答案。\n\n" 3 | "include": "_default_ceval_yaml" 4 | "task": "ceval-valid_art_studies" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_college_law.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_law" 2 | "description": "以下是关于大学法律的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_college_law" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/crows_pairs/crows_pairs_english_religion.yaml: -------------------------------------------------------------------------------- 1 | include: crows_pairs_english.yaml 2 | task: crows_pairs_english_religion 3 | dataset_name: english 4 | process_docs: !function utils.filter_religion 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/crows_pairs/crows_pairs_french_religion.yaml: -------------------------------------------------------------------------------- 1 | include: crows_pairs_english.yaml 2 | task: crows_pairs_french_religion 3 | dataset_name: french 4 | process_docs: !function utils.filter_religion 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/gpqa/cot_zeroshot/gpqa_extended_cot_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_extended 3 | include: _gpqa_cot_zeroshot_yaml 4 | task: gpqa_extended_cot_zeroshot 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/gpqa/generative/gpqa_main_generative_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_main 3 | include: _gpqa_generative_n_shot_yaml 4 | task: gpqa_main_generative_n_shot 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Gas-Technology-and-Engineering 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_gas_technology_and_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Political-Science-and-Sociology 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_political_science_and_sociology 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electronics_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: electronics_engineering 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_electronics_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_refrigerating_machinery.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: refrigerating_machinery 2 | include: _direct_hard_kmmlu_yaml 3 | task: kmmlu_hard_direct_refrigerating_machinery 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/high-discount-factor.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: high-discount-factor 3 | include: _template_yaml 4 | task: persona_high-discount-factor 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/interest-in-science.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: interest-in-science 3 | include: _template_yaml 4 | task: persona_interest-in-science 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/low-discount-factor.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: low-discount-factor 3 | include: _template_yaml 4 | task: persona_low-discount-factor 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/no-power-discomfort.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: no-power-discomfort 3 | include: _template_yaml 4 | task: persona_no-power-discomfort 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/politically-liberal.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: politically-liberal 3 | include: _template_yaml 4 | task: persona_politically-liberal 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/resource-acquisition.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: resource-acquisition 3 | include: _template_yaml 4 | task: persona_resource-acquisition 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/subscribes-to-Islam.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: subscribes-to-Islam 3 | include: _template_yaml 4 | task: persona_subscribes-to-Islam 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/subscribes-to-Taoism.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: subscribes-to-Taoism 3 | include: _template_yaml 4 | task: persona_subscribes-to-Taoism 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/toxigen/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def doc_to_target(doc): 5 | return np.round(((doc["toxicity_ai"] + doc["toxicity_human"]) > 5.5), 0).astype( 6 | np.int32 7 | ) 8 | -------------------------------------------------------------------------------- /lm_eval/tasks/aclue/aclue_ancient_medical.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "ancient_medical" 2 | "description": "以下是关于医古文的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "aclue_ancient_medical" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ammlu/ammlu_miscellaneous.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "miscellaneous" 2 | "description": "فم بعملية التقييم في مجال علوم أخرى \n\n" 3 | "include": "_default_template_yaml" 4 | "task": "ammlu_miscellaneous" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ammlu/ammlu_philosophy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "philosophy" 2 | "description": "فم بعملية التقييم في مجال العلوم الانسانية \n\n" 3 | "include": "_default_template_yaml" 4 | "task": "ammlu_philosophy" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ammlu/ammlu_prehistory.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "prehistory" 2 | "description": "فم بعملية التقييم في مجال العلوم الانسانية \n\n" 3 | "include": "_default_template_yaml" 4 | "task": "ammlu_prehistory" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/anachronisms.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: anachronisms_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_anachronisms_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/fact_checker.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: fact_checker_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_fact_checker_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/few_shot_nlg.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: few_shot_nlg_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_few_shot_nlg_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/implicatures.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: implicatures_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_implicatures_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/logical_args.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: logical_args_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_logical_args_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/matrixshapes.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: matrixshapes_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_matrixshapes_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/riddle_sense.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: riddle_sense_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_riddle_sense_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/suicide_risk.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: suicide_risk_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_suicide_risk_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/topical_chat.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: topical_chat_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_topical_chat_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/word_sorting.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: word_sorting_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_word_sorting_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: emoji_movie_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_emoji_movie_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/kanji_ascii.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: kanji_ascii_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_kanji_ascii_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/mnist_ascii.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: mnist_ascii_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_mnist_ascii_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/odd_one_out.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: odd_one_out_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_odd_one_out_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/parsinlu_qa.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: parsinlu_qa_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_parsinlu_qa_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/qa_wikidata.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: qa_wikidata_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_qa_wikidata_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/existential_there_subject_raising.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: existential_there_subject_raising 3 | include: _template_yaml 4 | task: blimp_existential_there_subject_raising 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/blimp/wh_vs_that_with_gap_long_distance.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: wh_vs_that_with_gap_long_distance 3 | include: _template_yaml 4 | task: blimp_wh_vs_that_with_gap_long_distance 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ceval/ceval-valid_civil_servant.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "civil_servant" 2 | "description": "以下是中国关于公务员的单项选择题,请选出其中的正确答案。\n\n" 3 | "include": "_default_ceval_yaml" 4 | "task": "ceval-valid_civil_servant" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_food_science.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "food_science" 2 | "description": "以下是关于食品科学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_food_science" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_global_facts.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "global_facts" 2 | "description": "以下是关于全球事实的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_global_facts" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_jurisprudence.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "jurisprudence" 2 | "description": "以下是关于法理学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_jurisprudence" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/cmmlu/cmmlu_default_world_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "world_history" 2 | "description": "以下是关于世界历史的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "cmmlu_world_history" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/crows_pairs/crows_pairs_english_disability.yaml: -------------------------------------------------------------------------------- 1 | include: crows_pairs_english.yaml 2 | task: crows_pairs_english_disability 3 | dataset_name: english 4 | process_docs: !function utils.filter_disability 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/crows_pairs/crows_pairs_english_race_color.yaml: -------------------------------------------------------------------------------- 1 | include: crows_pairs_english.yaml 2 | task: crows_pairs_english_race_color 3 | dataset_name: english 4 | process_docs: !function utils.filter_race_color 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/crows_pairs/crows_pairs_french_disability.yaml: -------------------------------------------------------------------------------- 1 | include: crows_pairs_english.yaml 2 | task: crows_pairs_french_disability 3 | dataset_name: french 4 | process_docs: !function utils.filter_disability 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/crows_pairs/crows_pairs_french_nationality.yaml: -------------------------------------------------------------------------------- 1 | include: crows_pairs_english.yaml 2 | task: crows_pairs_french_nationality 3 | dataset_name: french 4 | process_docs: !function utils.filter_nationality 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/crows_pairs/crows_pairs_french_race_color.yaml: -------------------------------------------------------------------------------- 1 | include: crows_pairs_english.yaml 2 | task: crows_pairs_french_race_color 3 | dataset_name: french 4 | process_docs: !function utils.filter_race_color 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/crows_pairs/crows_pairs_french_socioeconomic.yaml: -------------------------------------------------------------------------------- 1 | include: crows_pairs_english.yaml 2 | task: crows_pairs_french_socioeconomic 3 | dataset_name: french 4 | process_docs: !function utils.filter_socio 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Interior-Architecture-and-Design 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_interior_architecture_and_design 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Machine-Design-and-Manufacturing 2 | include: _direct_kmmlu_yaml 3 | task: kmmlu_direct_machine_design_and_manufacturing 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/kmmlu/hard/kmmlu_hard_railway_and_automotive_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: railway_and_automotive_engineering 2 | include: _hard_kmmlu_yaml 3 | task: kmmlu_hard_railway_and_automotive_engineering 4 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/cognitive-enhancement.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: cognitive-enhancement 3 | include: _template_yaml 4 | task: persona_cognitive-enhancement 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/desire-for-popularity.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: desire-for-popularity 3 | include: _template_yaml 4 | task: persona_desire-for-popularity 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/desire-for-wide-usage.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: desire-for-wide-usage 3 | include: _template_yaml 4 | task: persona_desire-for-wide-usage 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/subscribes-to-Atheism.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: subscribes-to-Atheism 3 | include: _template_yaml 4 | task: persona_subscribes-to-Atheism 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/model_written_evals/persona/subscribes-to-Judaism.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: subscribes-to-Judaism 3 | include: _template_yaml 4 | task: persona_subscribes-to-Judaism 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/aclue/aclue_ancient_phonetics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "ancient_phonetics" 2 | "description": "以下是关于古音学的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "aclue_ancient_phonetics" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/aclue/aclue_couplet_prediction.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "couplet_prediction" 2 | "description": "以下是关于对联的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "aclue_couplet_prediction" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/aclue/aclue_poetry_appreciate.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "poetry_appreciate" 2 | "description": "以下是关于古诗词曲鉴赏的单项选择题,请直接给出正确答案的选项。\n\n" 3 | "include": "_default_template_yaml" 4 | "task": "aclue_poetry_appreciate" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ammlu/ammlu_business_ethics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "business_ethics" 2 | "description": "فم بعملية التقييم في مجال علوم أخرى \n\n" 3 | "include": "_default_template_yaml" 4 | "task": "ammlu_business_ethics" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ammlu/ammlu_econometrics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "econometrics" 2 | "description": "فم بعملية التقييم في مجال العلوم الإجتماعية \n\n" 3 | "include": "_default_template_yaml" 4 | "task": "ammlu_econometrics" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ammlu/ammlu_formal_logic.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "formal_logic" 2 | "description": "فم بعملية التقييم في مجال العلوم الانسانية \n\n" 3 | "include": "_default_template_yaml" 4 | "task": "ammlu_formal_logic" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/ammlu/ammlu_jurisprudence.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "jurisprudence" 2 | "description": "فم بعملية التقييم في مجال العلوم الانسانية \n\n" 3 | "include": "_default_template_yaml" 4 | "task": "ammlu_jurisprudence" 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/auto_debugging.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: auto_debugging_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_auto_debugging_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/bbq_lite_json.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: bbq_lite_json_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_bbq_lite_json_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/crash_blossom.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: crash_blossom_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_crash_blossom_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/cs_algorithms.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: cs_algorithms_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_cs_algorithms_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/dyck_languages.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: dyck_languages_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_dyck_languages_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/hhh_alignment.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: hhh_alignment_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_hhh_alignment_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/key_value_maps.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: key_value_maps_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_key_value_maps_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/known_unknowns.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: known_unknowns_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_known_unknowns_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/language_games.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: language_games_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_language_games_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/list_functions.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: list_functions_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_list_functions_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/misconceptions.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: misconceptions_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_misconceptions_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/novel_concepts.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: novel_concepts_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_novel_concepts_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/persian_idioms.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: persian_idioms_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_persian_idioms_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/generate_until/social_support.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: social_support_zero_shot 3 | include: ../generate_until_template_yaml 4 | task: bigbench_social_support_generate_until 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: anachronisms_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_anachronisms_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/bbq_lite_json.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: bbq_lite_json_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_bbq_lite_json_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: crash_blossom_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_crash_blossom_multiple_choice 5 | -------------------------------------------------------------------------------- /lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml: -------------------------------------------------------------------------------- 1 | # Generated by utils.py 2 | dataset_name: cs_algorithms_zero_shot 3 | include: ../multiple_choice_template_yaml 4 | task: bigbench_cs_algorithms_multiple_choice 5 | --------------------------------------------------------------------------------