├── decompose
    └── __init__.py
├── lm_eval
    ├── api
    │   └── __init__.py
    ├── tasks
    │   ├── nq_open
    │   │   └── README.md
    │   ├── squadv2
    │   │   └── squadv2.yaml
    │   ├── fld
    │   │   └── fld_star.yaml
    │   ├── headqa
    │   │   └── headqa_es.yaml
    │   ├── pile
    │   │   ├── pile_enron.yaml
    │   │   ├── pile_uspto.yaml
    │   │   ├── pile_books3.yaml
    │   │   ├── pile_github.yaml
    │   │   ├── pile_europarl.yaml
    │   │   ├── pile_freelaw.yaml
    │   │   ├── pile_pile-cc.yaml
    │   │   ├── pile_gutenberg.yaml
    │   │   ├── pile_hackernews.yaml
    │   │   ├── pile_philpapers.yaml
    │   │   ├── pile_ubuntu-irc.yaml
    │   │   ├── pile_wikipedia.yaml
    │   │   ├── pile_bookcorpus2.yaml
    │   │   ├── pile_nih-exporter.yaml
    │   │   ├── pile_opensubtitles.yaml
    │   │   ├── pile_openwebtext2.yaml
    │   │   ├── pile_stackexchange.yaml
    │   │   ├── pile_dm-mathematics.yaml
    │   │   ├── pile_pubmed-central.yaml
    │   │   ├── pile_pubmed-abstracts.yaml
    │   │   └── pile_youtubesubtitles.yaml
    │   ├── mutual
    │   │   └── multual_plus.yaml
    │   ├── scrolls
    │   │   ├── scrolls_qasper.yaml
    │   │   ├── scrolls_qmsum.yaml
    │   │   ├── scrolls_quality.yaml
    │   │   ├── scrolls_govreport.yaml
    │   │   ├── scrolls_contractnli.yaml
    │   │   ├── scrolls_narrativeqa.yaml
    │   │   └── scrolls_summscreenfd.yaml
    │   ├── xstorycloze
    │   │   ├── default_en.yaml
    │   │   ├── default_es.yaml
    │   │   ├── default_eu.yaml
    │   │   ├── default_hi.yaml
    │   │   ├── default_id.yaml
    │   │   ├── default_my.yaml
    │   │   ├── default_ru.yaml
    │   │   ├── default_sw.yaml
    │   │   ├── default_te.yaml
    │   │   └── default_zh.yaml
    │   ├── arc
    │   │   └── arc_challenge.yaml
    │   ├── csatqa
    │   │   ├── csatqa_gr.yaml
    │   │   ├── csatqa_li.yaml
    │   │   ├── csatqa_wr.yaml
    │   │   ├── csatqa_rch.yaml
    │   │   ├── csatqa_rcs.yaml
    │   │   └── csatqa_rcss.yaml
    │   ├── kmmlu
    │   │   ├── hard
    │   │   │   ├── kmmlu_hard_law.yaml
    │   │   │   ├── kmmlu_hard_math.yaml
    │   │   │   ├── kmmlu_hard_biology.yaml
    │   │   │   ├── kmmlu_hard_ecology.yaml
    │   │   │   ├── kmmlu_hard_fashion.yaml
    │   │   │   ├── kmmlu_hard_health.yaml
    │   │   │   ├── kmmlu_hard_patent.yaml
    │   │   │   ├── kmmlu_hard_taxation.yaml
    │   │   │   ├── kmmlu_hard_accounting.yaml
    │   │   │   ├── kmmlu_hard_chemistry.yaml
    │   │   │   ├── kmmlu_hard_economics.yaml
    │   │   │   ├── kmmlu_hard_education.yaml
    │   │   │   ├── kmmlu_hard_geomatics.yaml
    │   │   │   ├── kmmlu_hard_management.yaml
    │   │   │   ├── kmmlu_hard_marketing.yaml
    │   │   │   ├── kmmlu_hard_psychology.yaml
    │   │   │   ├── kmmlu_hard_construction.yaml
    │   │   │   ├── kmmlu_hard_criminal_law.yaml
    │   │   │   ├── kmmlu_hard_real_estate.yaml
    │   │   │   ├── kmmlu_hard_public_safety.yaml
    │   │   │   ├── kmmlu_hard_food_processing.yaml
    │   │   │   ├── kmmlu_hard_korean_history.yaml
    │   │   │   ├── kmmlu_hard_social_welfare.yaml
    │   │   │   ├── kmmlu_hard_civil_engineering.yaml
    │   │   │   ├── kmmlu_hard_computer_science.yaml
    │   │   │   ├── kmmlu_hard_energy_management.yaml
    │   │   │   ├── kmmlu_hard_chemical_engineering.yaml
    │   │   │   ├── kmmlu_hard_industrial_engineer.yaml
    │   │   │   ├── kmmlu_hard_maritime_engineering.yaml
    │   │   │   ├── kmmlu_hard_agricultural_sciences.yaml
    │   │   │   ├── kmmlu_hard_electrical_engineering.yaml
    │   │   │   ├── kmmlu_hard_environmental_science.yaml
    │   │   │   ├── kmmlu_hard_information_technology.yaml
    │   │   │   ├── kmmlu_hard_materials_engineering.yaml
    │   │   │   ├── kmmlu_hard_mechanical_engineering.yaml
    │   │   │   ├── kmmlu_hard_nondestructive_testing.yaml
    │   │   │   ├── kmmlu_hard_electronics_engineering.yaml
    │   │   │   ├── kmmlu_hard_refrigerating_machinery.yaml
    │   │   │   ├── kmmlu_hard_gas_technology_and_engineering.yaml
    │   │   │   ├── kmmlu_hard_interior_architecture_and_design.yaml
    │   │   │   ├── kmmlu_hard_machine_design_and_manufacturing.yaml
    │   │   │   ├── kmmlu_hard_political_science_and_sociology.yaml
    │   │   │   └── kmmlu_hard_railway_and_automotive_engineering.yaml
    │   │   ├── direct
    │   │   │   ├── kmmlu_direct_law.yaml
    │   │   │   ├── kmmlu_direct_math.yaml
    │   │   │   ├── kmmlu_direct_health.yaml
    │   │   │   ├── kmmlu_direct_patent.yaml
    │   │   │   ├── kmmlu_direct_biology.yaml
    │   │   │   ├── kmmlu_direct_ecology.yaml
    │   │   │   ├── kmmlu_direct_fashion.yaml
    │   │   │   ├── kmmlu_direct_chemistry.yaml
    │   │   │   ├── kmmlu_direct_economics.yaml
    │   │   │   ├── kmmlu_direct_education.yaml
    │   │   │   ├── kmmlu_direct_geomatics.yaml
    │   │   │   ├── kmmlu_direct_marketing.yaml
    │   │   │   ├── kmmlu_direct_taxation.yaml
    │   │   │   ├── kmmlu_direct_accounting.yaml
    │   │   │   ├── kmmlu_direct_management.yaml
    │   │   │   ├── kmmlu_direct_psychology.yaml
    │   │   │   ├── kmmlu_direct_real_estate.yaml
    │   │   │   ├── kmmlu_direct_construction.yaml
    │   │   │   ├── kmmlu_direct_criminal_law.yaml
    │   │   │   ├── kmmlu_direct_korean_history.yaml
    │   │   │   ├── kmmlu_direct_public_safety.yaml
    │   │   │   ├── kmmlu_direct_social_welfare.yaml
    │   │   │   ├── kmmlu_direct_computer_science.yaml
    │   │   │   ├── kmmlu_direct_food_processing.yaml
    │   │   │   ├── kmmlu_direct_civil_engineering.yaml
    │   │   │   ├── kmmlu_direct_energy_management.yaml
    │   │   │   ├── kmmlu_direct_industrial_engineer.yaml
    │   │   │   ├── kmmlu_direct_agricultural_sciences.yaml
    │   │   │   ├── kmmlu_direct_chemical_engineering.yaml
    │   │   │   ├── kmmlu_direct_environmental_science.yaml
    │   │   │   ├── kmmlu_direct_maritime_engineering.yaml
    │   │   │   ├── kmmlu_direct_materials_engineering.yaml
    │   │   │   ├── kmmlu_direct_electrical_engineering.yaml
    │   │   │   ├── kmmlu_direct_information_technology.yaml
    │   │   │   ├── kmmlu_direct_mechanical_engineering.yaml
    │   │   │   ├── kmmlu_direct_nondestructive_testing.yaml
    │   │   │   ├── kmmlu_direct_electronics_engineering.yaml
    │   │   │   ├── kmmlu_direct_refrigerating_machinery.yaml
    │   │   │   ├── kmmlu_direct_gas_technology_and_engineering.yaml
    │   │   │   ├── kmmlu_direct_political_science_and_sociology.yaml
    │   │   │   ├── kmmlu_direct_interior_architecture_and_design.yaml
    │   │   │   └── kmmlu_direct_machine_design_and_manufacturing.yaml
    │   │   └── direct_hard
    │   │   │   ├── kmmlu_direct_hard_law.yaml
    │   │   │   ├── kmmlu_direct_hard_math.yaml
    │   │   │   ├── kmmlu_direct_hard_health.yaml
    │   │   │   ├── kmmlu_direct_hard_patent.yaml
    │   │   │   ├── kmmlu_direct_hard_biology.yaml
    │   │   │   ├── kmmlu_direct_hard_ecology.yaml
    │   │   │   ├── kmmlu_direct_hard_fashion.yaml
    │   │   │   ├── kmmlu_direct_hard_chemistry.yaml
    │   │   │   ├── kmmlu_direct_hard_economics.yaml
    │   │   │   ├── kmmlu_direct_hard_education.yaml
    │   │   │   ├── kmmlu_direct_hard_geomatics.yaml
    │   │   │   ├── kmmlu_direct_hard_marketing.yaml
    │   │   │   ├── kmmlu_direct_hard_taxation.yaml
    │   │   │   ├── kmmlu_direct_hard_accounting.yaml
    │   │   │   ├── kmmlu_direct_hard_management.yaml
    │   │   │   ├── kmmlu_direct_hard_psychology.yaml
    │   │   │   ├── kmmlu_direct_hard_real_estate.yaml
    │   │   │   ├── kmmlu_direct_hard_construction.yaml
    │   │   │   ├── kmmlu_direct_hard_criminal_law.yaml
    │   │   │   ├── kmmlu_direct_hard_korean_history.yaml
    │   │   │   ├── kmmlu_direct_hard_public_safety.yaml
    │   │   │   ├── kmmlu_direct_hard_social_welfare.yaml
    │   │   │   ├── kmmlu_direct_hard_computer_science.yaml
    │   │   │   ├── kmmlu_direct_hard_food_processing.yaml
    │   │   │   ├── kmmlu_direct_hard_civil_engineering.yaml
    │   │   │   ├── kmmlu_direct_hard_energy_management.yaml
    │   │   │   ├── kmmlu_direct_hard_industrial_engineer.yaml
    │   │   │   ├── kmmlu_direct_hard_agricultural_sciences.yaml
    │   │   │   ├── kmmlu_direct_hard_chemical_engineering.yaml
    │   │   │   ├── kmmlu_direct_hard_environmental_science.yaml
    │   │   │   ├── kmmlu_direct_hard_maritime_engineering.yaml
    │   │   │   ├── kmmlu_direct_hard_materials_engineering.yaml
    │   │   │   ├── kmmlu_direct_hard_electrical_engineering.yaml
    │   │   │   ├── kmmlu_direct_hard_information_technology.yaml
    │   │   │   ├── kmmlu_direct_hard_mechanical_engineering.yaml
    │   │   │   ├── kmmlu_direct_hard_nondestructive_testing.yaml
    │   │   │   ├── kmmlu_direct_hard_electronics_engineering.yaml
    │   │   │   └── kmmlu_direct_hard_refrigerating_machinery.yaml
    │   ├── glue
    │   │   └── mnli
    │   │   │   └── mismatch.yaml
    │   ├── haerae
    │   │   ├── haerae_hi.yaml
    │   │   ├── haerae_lw.yaml
    │   │   ├── haerae_rw.yaml
    │   │   ├── haerae_gk.yaml
    │   │   └── haerae_sn.yaml
    │   ├── french_bench
    │   │   └── _default_template_yaml
    │   ├── lambada_multilingual
    │   │   ├── lambada_mt_de.yaml
    │   │   ├── lambada_mt_es.yaml
    │   │   ├── lambada_mt_fr.yaml
    │   │   └── lambada_mt_it.yaml
    │   ├── xnli_eu
    │   │   ├── xnli_eu_mt.yaml
    │   │   └── xnli_eu_native.yaml
    │   ├── crows_pairs
    │   │   ├── crows_pairs_french.yaml
    │   │   ├── crows_pairs_english_age.yaml
    │   │   ├── crows_pairs_french_age.yaml
    │   │   ├── crows_pairs_english_autre.yaml
    │   │   ├── crows_pairs_french_autre.yaml
    │   │   ├── crows_pairs_french_gender.yaml
    │   │   ├── crows_pairs_english_gender.yaml
    │   │   ├── crows_pairs_english_religion.yaml
    │   │   ├── crows_pairs_french_religion.yaml
    │   │   ├── crows_pairs_english_disability.yaml
    │   │   ├── crows_pairs_english_race_color.yaml
    │   │   ├── crows_pairs_french_disability.yaml
    │   │   ├── crows_pairs_french_nationality.yaml
    │   │   ├── crows_pairs_french_race_color.yaml
    │   │   └── crows_pairs_french_socioeconomic.yaml
    │   ├── blimp
    │   │   ├── causative.yaml
    │   │   ├── inchoative.yaml
    │   │   ├── passive_1.yaml
    │   │   ├── passive_2.yaml
    │   │   ├── transitive.yaml
    │   │   ├── wh_island.yaml
    │   │   ├── intransitive.yaml
    │   │   ├── drop_argument.yaml
    │   │   ├── npi_present_1.yaml
    │   │   ├── npi_present_2.yaml
    │   │   ├── adjunct_island.yaml
    │   │   ├── only_npi_scope.yaml
    │   │   ├── complex_NP_island.yaml
    │   │   ├── ellipsis_n_bar_1.yaml
    │   │   ├── ellipsis_n_bar_2.yaml
    │   │   ├── wh_vs_that_no_gap.yaml
    │   │   ├── principle_A_case_1.yaml
    │   │   ├── principle_A_case_2.yaml
    │   │   ├── tough_vs_raising_1.yaml
    │   │   ├── tough_vs_raising_2.yaml
    │   │   ├── principle_A_domain_1.yaml
    │   │   ├── principle_A_domain_2.yaml
    │   │   ├── principle_A_domain_3.yaml
    │   │   ├── wh_vs_that_with_gap.yaml
    │   │   ├── animate_subject_trans.yaml
    │   │   ├── principle_A_c_command.yaml
    │   │   ├── animate_subject_passive.yaml
    │   │   ├── wh_questions_object_gap.yaml
    │   │   ├── anaphor_gender_agreement.yaml
    │   │   ├── anaphor_number_agreement.yaml
    │   │   ├── only_npi_licensor_present.yaml
    │   │   ├── sentential_subject_island.yaml
    │   │   ├── superlative_quantifiers_1.yaml
    │   │   ├── superlative_quantifiers_2.yaml
    │   │   ├── wh_questions_subject_gap.yaml
    │   │   ├── determiner_noun_agreement_1.yaml
    │   │   ├── determiner_noun_agreement_2.yaml
    │   │   ├── expletive_it_object_raising.yaml
    │   │   ├── principle_A_reconstruction.yaml
    │   │   ├── sentential_negation_npi_scope.yaml
    │   │   ├── existential_there_object_raising.yaml
    │   │   ├── existential_there_quantifiers_1.yaml
    │   │   ├── existential_there_quantifiers_2.yaml
    │   │   ├── irregular_past_participle_verbs.yaml
    │   │   ├── left_branch_island_echo_question.yaml
    │   │   ├── wh_vs_that_no_gap_long_distance.yaml
    │   │   ├── existential_there_subject_raising.yaml
    │   │   └── wh_vs_that_with_gap_long_distance.yaml
    │   ├── qa4mre
    │   │   ├── qa4mre_2012.yaml
    │   │   └── qa4mre_2013.yaml
    │   ├── xwinograd
    │   │   ├── xwinograd_en.yaml
    │   │   ├── xwinograd_fr.yaml
    │   │   ├── xwinograd_jp.yaml
    │   │   ├── xwinograd_pt.yaml
    │   │   ├── xwinograd_ru.yaml
    │   │   └── xwinograd_zh.yaml
    │   ├── minerva_math
    │   │   ├── minerva_math_geometry.yaml
    │   │   ├── minerva_math_precalc.yaml
    │   │   ├── minerva_math_prealgebra.yaml
    │   │   ├── minerva_math_num_theory.yaml
    │   │   ├── minerva_math_counting_and_prob.yaml
    │   │   └── minerva_math_intermediate_algebra.yaml
    │   ├── mmlu
    │   │   └── default
    │   │   │   └── _mmlu.yaml
    │   ├── xcopa
    │   │   ├── default_ht.yaml
    │   │   ├── default_id.yaml
    │   │   ├── default_it.yaml
    │   │   ├── default_qu.yaml
    │   │   ├── default_sw.yaml
    │   │   ├── default_ta.yaml
    │   │   ├── default_th.yaml
    │   │   ├── default_tr.yaml
    │   │   ├── default_vi.yaml
    │   │   └── default_zh.yaml
    │   ├── anli
    │   │   ├── anli_r2.yaml
    │   │   └── anli_r3.yaml
    │   ├── okapi
    │   │   └── mmlu_multilingual
    │   │   │   ├── m_mmlu_ar.yaml
    │   │   │   ├── m_mmlu_bn.yaml
    │   │   │   ├── m_mmlu_ca.yaml
    │   │   │   ├── m_mmlu_da.yaml
    │   │   │   ├── m_mmlu_de.yaml
    │   │   │   ├── m_mmlu_en.yaml
    │   │   │   ├── m_mmlu_es.yaml
    │   │   │   ├── m_mmlu_eu.yaml
    │   │   │   ├── m_mmlu_fr.yaml
    │   │   │   ├── m_mmlu_gu.yaml
    │   │   │   ├── m_mmlu_hi.yaml
    │   │   │   ├── m_mmlu_hr.yaml
    │   │   │   ├── m_mmlu_hu.yaml
    │   │   │   ├── m_mmlu_hy.yaml
    │   │   │   ├── m_mmlu_id.yaml
    │   │   │   ├── m_mmlu_is.yaml
    │   │   │   ├── m_mmlu_it.yaml
    │   │   │   ├── m_mmlu_kn.yaml
    │   │   │   ├── m_mmlu_ml.yaml
    │   │   │   ├── m_mmlu_mr.yaml
    │   │   │   ├── m_mmlu_nb.yaml
    │   │   │   ├── m_mmlu_ne.yaml
    │   │   │   ├── m_mmlu_nl.yaml
    │   │   │   ├── m_mmlu_pt.yaml
    │   │   │   ├── m_mmlu_ro.yaml
    │   │   │   ├── m_mmlu_ru.yaml
    │   │   │   ├── m_mmlu_sk.yaml
    │   │   │   ├── m_mmlu_sr.yaml
    │   │   │   ├── m_mmlu_sv.yaml
    │   │   │   ├── m_mmlu_ta.yaml
    │   │   │   ├── m_mmlu_te.yaml
    │   │   │   ├── m_mmlu_uk.yaml
    │   │   │   ├── m_mmlu_vi.yaml
    │   │   │   └── m_mmlu_zh.yaml
    │   ├── polemo2
    │   │   └── polemo2_out.yaml
    │   ├── eus_exams
    │   │   ├── eus_exams_es_ejauxiliar.yaml
    │   │   ├── eus_exams_es_ejtecnico.yaml
    │   │   ├── eus_exams_es_opebilbao.yaml
    │   │   ├── eus_exams_es_opeehuaux.yaml
    │   │   ├── eus_exams_eu_ejlaguntza.yaml
    │   │   ├── eus_exams_es_ejsubalterno.yaml
    │   │   ├── eus_exams_es_opeehuadmin.yaml
    │   │   ├── eus_exams_es_opeehubiblio.yaml
    │   │   ├── eus_exams_es_opeosakiaux.yaml
    │   │   ├── eus_exams_es_opeosakienf.yaml
    │   │   ├── eus_exams_es_osakidetza1c.yaml
    │   │   ├── eus_exams_es_osakidetza2c.yaml
    │   │   ├── eus_exams_es_osakidetza3c.yaml
    │   │   ├── eus_exams_es_osakidetza4c.yaml
    │   │   ├── eus_exams_es_osakidetza5c.yaml
    │   │   ├── eus_exams_es_osakidetza6c.yaml
    │   │   ├── eus_exams_es_osakidetza7c.yaml
    │   │   ├── eus_exams_es_osakidetza8c.yaml
    │   │   ├── eus_exams_es_osakidetza9c.yaml
    │   │   ├── eus_exams_eu_ejteknikari.yaml
    │   │   ├── eus_exams_eu_opebilbaoeu.yaml
    │   │   ├── eus_exams_eu_opeehuauxeu.yaml
    │   │   ├── eus_exams_eu_osakidetza1e.yaml
    │   │   ├── eus_exams_eu_osakidetza2e.yaml
    │   │   ├── eus_exams_eu_osakidetza3e.yaml
    │   │   ├── eus_exams_eu_osakidetza5e.yaml
    │   │   ├── eus_exams_eu_osakidetza6e.yaml
    │   │   ├── eus_exams_eu_osakidetza7e.yaml
    │   │   ├── eus_exams_es_opeehuderecho.yaml
    │   │   ├── eus_exams_es_opeehutecnico.yaml
    │   │   ├── eus_exams_es_opeosakiadmin.yaml
    │   │   ├── eus_exams_eu_ejlaguntzaile.yaml
    │   │   ├── eus_exams_eu_opeehuadmineu.yaml
    │   │   ├── eus_exams_eu_opeosakiauxeu.yaml
    │   │   ├── eus_exams_eu_opeosakienfeu.yaml
    │   │   ├── eus_exams_es_opeehutecnicob.yaml
    │   │   ├── eus_exams_es_opeosakiauxenf.yaml
    │   │   ├── eus_exams_es_opeosakicelador.yaml
    │   │   ├── eus_exams_es_opeosakitecnico.yaml
    │   │   ├── eus_exams_es_opeosakivarios.yaml
    │   │   ├── eus_exams_eu_ejadministrari.yaml
    │   │   ├── eus_exams_eu_opeehubiblioeu.yaml
    │   │   ├── eus_exams_eu_opeehuderechoeu.yaml
    │   │   ├── eus_exams_eu_opeehutecnicoeu.yaml
    │   │   ├── eus_exams_eu_opeosakiadmineu.yaml
    │   │   ├── eus_exams_es_ejadministrativo.yaml
    │   │   ├── eus_exams_es_opeehueconomicas.yaml
    │   │   ├── eus_exams_es_opeehusubalterno.yaml
    │   │   ├── eus_exams_es_opeosakijuridico.yaml
    │   │   ├── eus_exams_es_opeosakioperario.yaml
    │   │   ├── eus_exams_eu_opeehuteknikarib.yaml
    │   │   ├── eus_exams_eu_opegasteizkoudala.yaml
    │   │   ├── eus_exams_eu_opeosakiauxenfeu.yaml
    │   │   ├── eus_exams_eu_opeosakiceladoreu.yaml
    │   │   ├── eus_exams_eu_opeosakitecnicoeu.yaml
    │   │   ├── eus_exams_eu_opeosakivarioseu.yaml
    │   │   ├── eus_exams_eu_opeehueconomicaseu.yaml
    │   │   ├── eus_exams_eu_opeehusubalternoeu.yaml
    │   │   ├── eus_exams_eu_opeosakioperarioeu.yaml
    │   │   ├── eus_exams_es_opeehuempresariales.yaml
    │   │   ├── eus_exams_es_opeayuntamientovitoria.yaml
    │   │   └── eus_exams_eu_opeehuempresarialeseu.yaml
    │   ├── gpqa
    │   │   ├── n_shot
    │   │   │   ├── gpqa_main_n_shot.yaml
    │   │   │   ├── gpqa_diamond_n_shot.yaml
    │   │   │   └── gpqa_extended_n_shot.yaml
    │   │   ├── zeroshot
    │   │   │   ├── gpqa_main_zeroshot.yaml
    │   │   │   ├── gpqa_diamond_zeroshot.yaml
    │   │   │   └── gpqa_extended_zeroshot.yaml
    │   │   ├── cot_n_shot
    │   │   │   ├── gpqa_main_cot_n_shot.yaml
    │   │   │   ├── gpqa_diamond_cot_n_shot.yaml
    │   │   │   └── gpqa_extended_cot_n_shot.yaml
    │   │   ├── cot_zeroshot
    │   │   │   ├── gpqa_main_cot_zeroshot.yaml
    │   │   │   ├── gpqa_diamond_cot_zeroshot.yaml
    │   │   │   └── gpqa_extended_cot_zeroshot.yaml
    │   │   └── generative
    │   │   │   └── gpqa_main_generative_n_shot.yaml
    │   ├── belebele
    │   │   ├── belebele_acm_Arab.yaml
    │   │   ├── belebele_afr_Latn.yaml
    │   │   ├── belebele_als_Latn.yaml
    │   │   ├── belebele_amh_Ethi.yaml
    │   │   ├── belebele_apc_Arab.yaml
    │   │   ├── belebele_arb_Arab.yaml
    │   │   ├── belebele_arb_Latn.yaml
    │   │   ├── belebele_ars_Arab.yaml
    │   │   ├── belebele_ary_Arab.yaml
    │   │   ├── belebele_arz_Arab.yaml
    │   │   ├── belebele_asm_Beng.yaml
    │   │   ├── belebele_azj_Latn.yaml
    │   │   ├── belebele_bam_Latn.yaml
    │   │   ├── belebele_ben_Beng.yaml
    │   │   ├── belebele_ben_Latn.yaml
    │   │   ├── belebele_bod_Tibt.yaml
    │   │   ├── belebele_bul_Cyrl.yaml
    │   │   ├── belebele_cat_Latn.yaml
    │   │   ├── belebele_ceb_Latn.yaml
    │   │   ├── belebele_ces_Latn.yaml
    │   │   ├── belebele_ckb_Arab.yaml
    │   │   ├── belebele_dan_Latn.yaml
    │   │   ├── belebele_deu_Latn.yaml
    │   │   ├── belebele_ell_Grek.yaml
    │   │   ├── belebele_eng_Latn.yaml
    │   │   ├── belebele_est_Latn.yaml
    │   │   ├── belebele_eus_Latn.yaml
    │   │   ├── belebele_fin_Latn.yaml
    │   │   ├── belebele_fra_Latn.yaml
    │   │   ├── belebele_fuv_Latn.yaml
    │   │   ├── belebele_gaz_Latn.yaml
    │   │   ├── belebele_grn_Latn.yaml
    │   │   ├── belebele_guj_Gujr.yaml
    │   │   ├── belebele_hat_Latn.yaml
    │   │   ├── belebele_hau_Latn.yaml
    │   │   ├── belebele_heb_Hebr.yaml
    │   │   ├── belebele_hin_Deva.yaml
    │   │   ├── belebele_hin_Latn.yaml
    │   │   ├── belebele_hrv_Latn.yaml
    │   │   ├── belebele_hun_Latn.yaml
    │   │   ├── belebele_hye_Armn.yaml
    │   │   ├── belebele_ibo_Latn.yaml
    │   │   ├── belebele_ilo_Latn.yaml
    │   │   ├── belebele_ind_Latn.yaml
    │   │   ├── belebele_isl_Latn.yaml
    │   │   ├── belebele_ita_Latn.yaml
    │   │   ├── belebele_jav_Latn.yaml
    │   │   ├── belebele_jpn_Jpan.yaml
    │   │   ├── belebele_kac_Latn.yaml
    │   │   ├── belebele_kan_Knda.yaml
    │   │   ├── belebele_kat_Geor.yaml
    │   │   ├── belebele_kaz_Cyrl.yaml
    │   │   ├── belebele_kea_Latn.yaml
    │   │   ├── belebele_khk_Cyrl.yaml
    │   │   ├── belebele_khm_Khmr.yaml
    │   │   ├── belebele_kin_Latn.yaml
    │   │   ├── belebele_kir_Cyrl.yaml
    │   │   ├── belebele_kor_Hang.yaml
    │   │   ├── belebele_lao_Laoo.yaml
    │   │   ├── belebele_lin_Latn.yaml
    │   │   ├── belebele_lit_Latn.yaml
    │   │   ├── belebele_lug_Latn.yaml
    │   │   ├── belebele_luo_Latn.yaml
    │   │   ├── belebele_lvs_Latn.yaml
    │   │   ├── belebele_mal_Mlym.yaml
    │   │   ├── belebele_mar_Deva.yaml
    │   │   ├── belebele_mkd_Cyrl.yaml
    │   │   ├── belebele_mlt_Latn.yaml
    │   │   ├── belebele_mri_Latn.yaml
    │   │   ├── belebele_mya_Mymr.yaml
    │   │   ├── belebele_nld_Latn.yaml
    │   │   ├── belebele_nob_Latn.yaml
    │   │   ├── belebele_npi_Deva.yaml
    │   │   ├── belebele_npi_Latn.yaml
    │   │   ├── belebele_nso_Latn.yaml
    │   │   ├── belebele_nya_Latn.yaml
    │   │   ├── belebele_ory_Orya.yaml
    │   │   ├── belebele_pan_Guru.yaml
    │   │   ├── belebele_pbt_Arab.yaml
    │   │   ├── belebele_pes_Arab.yaml
    │   │   ├── belebele_plt_Latn.yaml
    │   │   ├── belebele_pol_Latn.yaml
    │   │   ├── belebele_por_Latn.yaml
    │   │   ├── belebele_ron_Latn.yaml
    │   │   ├── belebele_rus_Cyrl.yaml
    │   │   ├── belebele_shn_Mymr.yaml
    │   │   ├── belebele_sin_Latn.yaml
    │   │   ├── belebele_sin_Sinh.yaml
    │   │   ├── belebele_slk_Latn.yaml
    │   │   ├── belebele_slv_Latn.yaml
    │   │   ├── belebele_sna_Latn.yaml
    │   │   ├── belebele_snd_Arab.yaml
    │   │   ├── belebele_som_Latn.yaml
    │   │   ├── belebele_sot_Latn.yaml
    │   │   ├── belebele_spa_Latn.yaml
    │   │   ├── belebele_srp_Cyrl.yaml
    │   │   ├── belebele_ssw_Latn.yaml
    │   │   ├── belebele_sun_Latn.yaml
    │   │   ├── belebele_swe_Latn.yaml
    │   │   ├── belebele_swh_Latn.yaml
    │   │   ├── belebele_tam_Taml.yaml
    │   │   ├── belebele_tel_Telu.yaml
    │   │   ├── belebele_tgk_Cyrl.yaml
    │   │   ├── belebele_tgl_Latn.yaml
    │   │   ├── belebele_tha_Thai.yaml
    │   │   ├── belebele_tir_Ethi.yaml
    │   │   ├── belebele_tsn_Latn.yaml
    │   │   ├── belebele_tso_Latn.yaml
    │   │   ├── belebele_tur_Latn.yaml
    │   │   ├── belebele_ukr_Cyrl.yaml
    │   │   ├── belebele_urd_Arab.yaml
    │   │   ├── belebele_urd_Latn.yaml
    │   │   ├── belebele_uzn_Latn.yaml
    │   │   ├── belebele_vie_Latn.yaml
    │   │   ├── belebele_war_Latn.yaml
    │   │   ├── belebele_wol_Latn.yaml
    │   │   ├── belebele_xho_Latn.yaml
    │   │   ├── belebele_yor_Latn.yaml
    │   │   ├── belebele_zho_Hans.yaml
    │   │   ├── belebele_zho_Hant.yaml
    │   │   ├── belebele_zsm_Latn.yaml
    │   │   └── belebele_zul_Latn.yaml
    │   ├── model_written_evals
    │   │   └── persona
    │   │   │   ├── openness.yaml
    │   │   │   ├── narcissism.yaml
    │   │   │   ├── neuroticism.yaml
    │   │   │   ├── psychopathy.yaml
    │   │   │   ├── risk-averse.yaml
    │   │   │   ├── agreeableness.yaml
    │   │   │   ├── extraversion.yaml
    │   │   │   ├── no-shut-down.yaml
    │   │   │   ├── risk-neutral.yaml
    │   │   │   ├── risk-seeking.yaml
    │   │   │   ├── has-disability.yaml
    │   │   │   ├── interest-in-art.yaml
    │   │   │   ├── no-goal-change.yaml
    │   │   │   ├── anti-immigration.yaml
    │   │   │   ├── interest-in-math.yaml
    │   │   │   ├── machiavellianism.yaml
    │   │   │   ├── self-replication.yaml
    │   │   │   ├── anti-LGBTQ-rights.yaml
    │   │   │   ├── conscientiousness.yaml
    │   │   │   ├── ends-justify-means.yaml
    │   │   │   ├── high-discount-rate.yaml
    │   │   │   ├── interest-in-music.yaml
    │   │   │   ├── interest-in-sports.yaml
    │   │   │   ├── low-discount-rate.yaml
    │   │   │   ├── stands-its-ground.yaml
    │   │   │   ├── high-discount-factor.yaml
    │   │   │   ├── interest-in-science.yaml
    │   │   │   ├── low-discount-factor.yaml
    │   │   │   ├── no-power-discomfort.yaml
    │   │   │   ├── politically-liberal.yaml
    │   │   │   ├── resource-acquisition.yaml
    │   │   │   ├── subscribes-to-Islam.yaml
    │   │   │   ├── subscribes-to-Taoism.yaml
    │   │   │   ├── cognitive-enhancement.yaml
    │   │   │   ├── desire-for-popularity.yaml
    │   │   │   ├── desire-for-wide-usage.yaml
    │   │   │   ├── subscribes-to-Atheism.yaml
    │   │   │   └── subscribes-to-Judaism.yaml
    │   ├── agieval
    │   │   ├── jec-qa-ca.yaml
    │   │   ├── jec-qa-kd.yaml
    │   │   ├── logiqa-zh.yaml
    │   │   ├── gaokao-mathqa.yaml
    │   │   ├── sat-en.yaml
    │   │   ├── gaokao-biology.yaml
    │   │   ├── gaokao-chinese.yaml
    │   │   ├── gaokao-history.yaml
    │   │   ├── gaokao-physics.yaml
    │   │   ├── lsat-ar.yaml
    │   │   ├── lsat-lr.yaml
    │   │   ├── lsat-rc.yaml
    │   │   ├── sat-math.yaml
    │   │   ├── gaokao-chemistry.yaml
    │   │   ├── gaokao-geography.yaml
    │   │   └── logiqa-en.yaml
    │   ├── tmmluplus
    │   │   └── default
    │   │   │   └── tmmluplus.yaml
    │   ├── arithmetic
    │   │   ├── arithmetic_2da.yaml
    │   │   ├── arithmetic_2dm.yaml
    │   │   ├── arithmetic_2ds.yaml
    │   │   ├── arithmetic_3da.yaml
    │   │   ├── arithmetic_3ds.yaml
    │   │   ├── arithmetic_4da.yaml
    │   │   ├── arithmetic_4ds.yaml
    │   │   ├── arithmetic_5da.yaml
    │   │   └── arithmetic_5ds.yaml
    │   ├── ceval
    │   │   ├── ceval-valid_law.yaml
    │   │   ├── ceval-valid_logic.yaml
    │   │   ├── ceval-valid_marxism.yaml
    │   │   ├── ceval-valid_physician.yaml
    │   │   ├── ceval-valid_accountant.yaml
    │   │   ├── ceval-valid_art_studies.yaml
    │   │   └── ceval-valid_civil_servant.yaml
    │   ├── bigbench
    │   │   ├── generate_until
    │   │   │   ├── gem.yaml
    │   │   │   ├── color.yaml
    │   │   │   ├── snarks.yaml
    │   │   │   ├── tense.yaml
    │   │   │   ├── kannada.yaml
    │   │   │   ├── physics.yaml
    │   │   │   ├── winowhy.yaml
    │   │   │   ├── codenames.yaml
    │   │   │   ├── crass_ai.yaml
    │   │   │   ├── disfl_qa.yaml
    │   │   │   ├── multiemo.yaml
    │   │   │   ├── navigate.yaml
    │   │   │   ├── operators.yaml
    │   │   │   ├── rephrase.yaml
    │   │   │   ├── timedial.yaml
    │   │   │   ├── arithmetic.yaml
    │   │   │   ├── cryptonite.yaml
    │   │   │   ├── emoji_movie.yaml
    │   │   │   ├── hyperbaton.yaml
    │   │   │   ├── kanji_ascii.yaml
    │   │   │   ├── mnist_ascii.yaml
    │   │   │   ├── odd_one_out.yaml
    │   │   │   ├── parsinlu_qa.yaml
    │   │   │   ├── qa_wikidata.yaml
    │   │   │   ├── ruin_names.yaml
    │   │   │   ├── social_iqa.yaml
    │   │   │   ├── strategyqa.yaml
    │   │   │   ├── anachronisms.yaml
    │   │   │   ├── fact_checker.yaml
    │   │   │   ├── few_shot_nlg.yaml
    │   │   │   ├── implicatures.yaml
    │   │   │   ├── logical_args.yaml
    │   │   │   ├── matrixshapes.yaml
    │   │   │   ├── riddle_sense.yaml
    │   │   │   ├── suicide_risk.yaml
    │   │   │   ├── topical_chat.yaml
    │   │   │   ├── word_sorting.yaml
    │   │   │   ├── auto_debugging.yaml
    │   │   │   ├── bbq_lite_json.yaml
    │   │   │   ├── crash_blossom.yaml
    │   │   │   ├── cs_algorithms.yaml
    │   │   │   ├── dyck_languages.yaml
    │   │   │   ├── hhh_alignment.yaml
    │   │   │   ├── key_value_maps.yaml
    │   │   │   ├── known_unknowns.yaml
    │   │   │   ├── language_games.yaml
    │   │   │   ├── list_functions.yaml
    │   │   │   ├── misconceptions.yaml
    │   │   │   ├── novel_concepts.yaml
    │   │   │   ├── persian_idioms.yaml
    │   │   │   └── social_support.yaml
    │   │   └── multiple_choice
    │   │   │   ├── gem.yaml
    │   │   │   ├── color.yaml
    │   │   │   ├── tense.yaml
    │   │   │   ├── snarks.yaml
    │   │   │   ├── crass_ai.yaml
    │   │   │   ├── disfl_qa.yaml
    │   │   │   ├── kannada.yaml
    │   │   │   ├── multiemo.yaml
    │   │   │   ├── navigate.yaml
    │   │   │   ├── physics.yaml
    │   │   │   ├── rephrase.yaml
    │   │   │   ├── timedial.yaml
    │   │   │   ├── winowhy.yaml
    │   │   │   ├── arithmetic.yaml
    │   │   │   ├── codenames.yaml
    │   │   │   ├── cryptonite.yaml
    │   │   │   ├── hyperbaton.yaml
    │   │   │   ├── operators.yaml
    │   │   │   ├── ruin_names.yaml
    │   │   │   ├── social_iqa.yaml
    │   │   │   ├── strategyqa.yaml
    │   │   │   ├── emoji_movie.yaml
    │   │   │   ├── kanji_ascii.yaml
    │   │   │   ├── mnist_ascii.yaml
    │   │   │   ├── odd_one_out.yaml
    │   │   │   ├── parsinlu_qa.yaml
    │   │   │   ├── qa_wikidata.yaml
    │   │   │   ├── anachronisms.yaml
    │   │   │   ├── bbq_lite_json.yaml
    │   │   │   ├── crash_blossom.yaml
    │   │   │   └── cs_algorithms.yaml
    │   ├── cmmlu
    │   │   ├── cmmlu_default_arts.yaml
    │   │   ├── cmmlu_default_anatomy.yaml
    │   │   ├── cmmlu_default_logical.yaml
    │   │   ├── cmmlu_default_agronomy.yaml
    │   │   ├── cmmlu_default_genetics.yaml
    │   │   ├── cmmlu_default_virology.yaml
    │   │   ├── cmmlu_default_astronomy.yaml
    │   │   ├── cmmlu_default_economics.yaml
    │   │   ├── cmmlu_default_education.yaml
    │   │   ├── cmmlu_default_ethnology.yaml
    │   │   ├── cmmlu_default_journalism.yaml
    │   │   ├── cmmlu_default_management.yaml
    │   │   ├── cmmlu_default_marketing.yaml
    │   │   ├── cmmlu_default_nutrition.yaml
    │   │   ├── cmmlu_default_philosophy.yaml
    │   │   ├── cmmlu_default_sociology.yaml
    │   │   ├── cmmlu_default_college_law.yaml
    │   │   ├── cmmlu_default_food_science.yaml
    │   │   ├── cmmlu_default_global_facts.yaml
    │   │   ├── cmmlu_default_jurisprudence.yaml
    │   │   └── cmmlu_default_world_history.yaml
    │   ├── aexams
    │   │   ├── aexams_Science.yaml
    │   │   ├── aexams_Physics.yaml
    │   │   ├── aexams_Biology.yaml
    │   │   └── aexams_Social.yaml
    │   ├── ammlu
    │   │   ├── ammlu_virology.yaml
    │   │   ├── ammlu_management.yaml
    │   │   ├── ammlu_marketing.yaml
    │   │   ├── ammlu_nutrition.yaml
    │   │   ├── ammlu_global_facts.yaml
    │   │   ├── ammlu_human_aging.yaml
    │   │   ├── ammlu_sociology.yaml
    │   │   ├── ammlu_miscellaneous.yaml
    │   │   ├── ammlu_philosophy.yaml
    │   │   ├── ammlu_prehistory.yaml
    │   │   ├── ammlu_business_ethics.yaml
    │   │   ├── ammlu_econometrics.yaml
    │   │   ├── ammlu_formal_logic.yaml
    │   │   └── ammlu_jurisprudence.yaml
    │   ├── toxigen
    │   │   └── utils.py
    │   └── aclue
    │   │   ├── aclue_ancient_medical.yaml
    │   │   ├── aclue_ancient_phonetics.yaml
    │   │   ├── aclue_couplet_prediction.yaml
    │   │   └── aclue_poetry_appreciate.yaml
    ├── decontamination
    │   └── __init__.py
    └── __init__.py
└── README.md


/decompose/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lm_eval/api/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/nq_open/README.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lm_eval/decontamination/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lm_eval/__init__.py:
--------------------------------------------------------------------------------
1 | from .evaluator import evaluate, simple_evaluate
2 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/squadv2/squadv2.yaml:
--------------------------------------------------------------------------------
1 | task: squadv2
2 | class: !function task.SQuAD2
3 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/fld/fld_star.yaml:
--------------------------------------------------------------------------------
1 | include: fld_default.yaml
2 | task: fld_star
3 | dataset_name: star
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/headqa/headqa_es.yaml:
--------------------------------------------------------------------------------
1 | include: headqa_en.yaml
2 | task: headqa_es
3 | dataset_name: es
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_enron.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_enron
3 | dataset_name: pile_enron
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_uspto.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_uspto
3 | dataset_name: pile_uspto
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/mutual/multual_plus.yaml:
--------------------------------------------------------------------------------
1 | include: mutual.yaml
2 | task: mutual_plus
3 | dataset_name: mutual_plus
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_books3.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_books3
3 | dataset_name: pile_books3
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_github.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_github
3 | dataset_name: pile_github
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/scrolls/scrolls_qasper.yaml:
--------------------------------------------------------------------------------
1 | group: scrolls
2 | task: scrolls_qasper
3 | class: !function task.Qasper
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/scrolls/scrolls_qmsum.yaml:
--------------------------------------------------------------------------------
1 | group: scrolls
2 | task: scrolls_qmsum
3 | class: !function task.QMSum
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xstorycloze/default_en.yaml:
--------------------------------------------------------------------------------
1 | include: default_ar.yaml
2 | task: xstorycloze_en
3 | dataset_name: en
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xstorycloze/default_es.yaml:
--------------------------------------------------------------------------------
1 | include: default_ar.yaml
2 | task: xstorycloze_es
3 | dataset_name: es
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xstorycloze/default_eu.yaml:
--------------------------------------------------------------------------------
1 | include: default_ar.yaml
2 | task: xstorycloze_eu
3 | dataset_name: eu
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xstorycloze/default_hi.yaml:
--------------------------------------------------------------------------------
1 | include: default_ar.yaml
2 | task: xstorycloze_hi
3 | dataset_name: hi
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xstorycloze/default_id.yaml:
--------------------------------------------------------------------------------
1 | include: default_ar.yaml
2 | task: xstorycloze_id
3 | dataset_name: id
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xstorycloze/default_my.yaml:
--------------------------------------------------------------------------------
1 | include: default_ar.yaml
2 | task: xstorycloze_my
3 | dataset_name: my
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xstorycloze/default_ru.yaml:
--------------------------------------------------------------------------------
1 | include: default_ar.yaml
2 | task: xstorycloze_ru
3 | dataset_name: ru
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xstorycloze/default_sw.yaml:
--------------------------------------------------------------------------------
1 | include: default_ar.yaml
2 | task: xstorycloze_sw
3 | dataset_name: sw
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xstorycloze/default_te.yaml:
--------------------------------------------------------------------------------
1 | include: default_ar.yaml
2 | task: xstorycloze_te
3 | dataset_name: te
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xstorycloze/default_zh.yaml:
--------------------------------------------------------------------------------
1 | include: default_ar.yaml
2 | task: xstorycloze_zh
3 | dataset_name: zh
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/arc/arc_challenge.yaml:
--------------------------------------------------------------------------------
1 | include: arc_easy.yaml
2 | task: arc_challenge
3 | dataset_name: ARC-Challenge
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/csatqa/csatqa_gr.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "GR"
2 | "include": "_default_csatqa_yaml"
3 | "task": "csatqa_gr"
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/csatqa/csatqa_li.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "LI"
2 | "include": "_default_csatqa_yaml"
3 | "task": "csatqa_li"
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/csatqa/csatqa_wr.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "WR"
2 | "include": "_default_csatqa_yaml"
3 | "task": "csatqa_wr"
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: law
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_law
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_europarl.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_europarl
3 | dataset_name: pile_europarl
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_freelaw.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_freelaw
3 | dataset_name: pile_freelaw
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_pile-cc.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_pile-cc
3 | dataset_name: pile_pile-cc
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/scrolls/scrolls_quality.yaml:
--------------------------------------------------------------------------------
1 | group: scrolls
2 | task: scrolls_quality
3 | class: !function task.QuALITY
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/csatqa/csatqa_rch.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "RCH"
2 | "include": "_default_csatqa_yaml"
3 | "task": "csatqa_rch"
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/csatqa/csatqa_rcs.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "RCS"
2 | "include": "_default_csatqa_yaml"
3 | "task": "csatqa_rcs"
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/csatqa/csatqa_rcss.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "RCSS"
2 | "include": "_default_csatqa_yaml"
3 | "task": "csatqa_rcss"
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: math
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_math
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_gutenberg.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_gutenberg
3 | dataset_name: pile_gutenberg
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_hackernews.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_hackernews
3 | dataset_name: pile_hackernews
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_philpapers.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_philpapers
3 | dataset_name: pile_philpapers
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_ubuntu-irc.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_ubuntu-irc
3 | dataset_name: pile_ubuntu-irc
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_wikipedia.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_wikipedia
3 | dataset_name: pile_wikipedia
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/scrolls/scrolls_govreport.yaml:
--------------------------------------------------------------------------------
1 | group: scrolls
2 | task: scrolls_govreport
3 | class: !function task.GovReport
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/glue/mnli/mismatch.yaml:
--------------------------------------------------------------------------------
1 | include: default.yaml
2 | task: mnli_mismatch
3 | validation_split: validation_mismatched
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/haerae/haerae_hi.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "history"
2 | "include": "_default_haerae_yaml"
3 | "task": "haerae_history"
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_law.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Law
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_law
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_math.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Math
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_math
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_biology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: biology
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_biology
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_ecology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: ecology
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_ecology
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_fashion.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: fashion
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_fashion
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: health
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_health
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: patent
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_patent
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_bookcorpus2.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_bookcorpus2
3 | dataset_name: pile_bookcorpus2
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/scrolls/scrolls_contractnli.yaml:
--------------------------------------------------------------------------------
1 | group: scrolls
2 | task: scrolls_contractnli
3 | class: !function task.ContractNLI
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/scrolls/scrolls_narrativeqa.yaml:
--------------------------------------------------------------------------------
1 | group: scrolls
2 | task: scrolls_narrativeqa
3 | class: !function task.NarrativeQA
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/french_bench/_default_template_yaml:
--------------------------------------------------------------------------------
1 | test_split: test
2 | fewshot_split: valid
3 | fewshot_config:
4 |   sampler: first_n
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/haerae/haerae_lw.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "loan_words"
2 | "include": "_default_haerae_yaml"
3 | "task": "haerae_loan_word"
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/haerae/haerae_rw.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "rare_words"
2 | "include": "_default_haerae_yaml"
3 | "task": "haerae_rare_word"
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_health.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Health
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_health
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_patent.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Patent
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_patent
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_taxation.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: taxation
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_taxation
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/lambada_multilingual/lambada_mt_de.yaml:
--------------------------------------------------------------------------------
1 | include: lambada_mt_en.yaml
2 | task: lambada_openai_mt_de
3 | dataset_name: de
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/lambada_multilingual/lambada_mt_es.yaml:
--------------------------------------------------------------------------------
1 | include: lambada_mt_en.yaml
2 | task: lambada_openai_mt_es
3 | dataset_name: es
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/lambada_multilingual/lambada_mt_fr.yaml:
--------------------------------------------------------------------------------
1 | include: lambada_mt_en.yaml
2 | task: lambada_openai_mt_fr
3 | dataset_name: fr
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/lambada_multilingual/lambada_mt_it.yaml:
--------------------------------------------------------------------------------
1 | include: lambada_mt_en.yaml
2 | task: lambada_openai_mt_it
3 | dataset_name: it
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_nih-exporter.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_nih-exporter
3 | dataset_name: pile_nih-exporter
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_opensubtitles.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_opensubtitles
3 | dataset_name: pile_opensubtitles
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_openwebtext2.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_openwebtext2
3 | dataset_name: pile_openwebtext2
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_stackexchange.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_stackexchange
3 | dataset_name: pile_stackexchange
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/scrolls/scrolls_summscreenfd.yaml:
--------------------------------------------------------------------------------
1 | group: scrolls
2 | task: scrolls_summscreenfd
3 | class: !function task.SummScreenFD
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xnli_eu/xnli_eu_mt.yaml:
--------------------------------------------------------------------------------
1 | include: xnli_eu.yaml
2 | group: xnli_eu_mt_native
3 | task: xnli_eu_mt
4 | dataset_name: eu_mt
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/crows_pairs/crows_pairs_french.yaml:
--------------------------------------------------------------------------------
1 | include: crows_pairs_english.yaml
2 | task: crows_pairs_french
3 | dataset_name: french
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_biology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Biology
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_biology
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_ecology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Ecology
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_ecology
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_fashion.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Fashion
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_fashion
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_accounting.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: accounting
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_accounting
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemistry.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: chemistry
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_chemistry
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_economics.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: economics
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_economics
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_education.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: education
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_education
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: geomatics
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_geomatics
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: management
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_management
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: marketing
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_marketing
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: psychology
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_psychology
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_dm-mathematics.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_dm-mathematics
3 | dataset_name: pile_dm-mathematics
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_pubmed-central.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_pubmed-central
3 | dataset_name: pile_pubmed-central
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/causative.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: causative
3 | include: _template_yaml
4 | task: blimp_causative
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/inchoative.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: inchoative
3 | include: _template_yaml
4 | task: blimp_inchoative
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/passive_1.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: passive_1
3 | include: _template_yaml
4 | task: blimp_passive_1
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/passive_2.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: passive_2
3 | include: _template_yaml
4 | task: blimp_passive_2
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/transitive.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: transitive
3 | include: _template_yaml
4 | task: blimp_transitive
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/wh_island.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: wh_island
3 | include: _template_yaml
4 | task: blimp_wh_island
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemistry.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Chemistry
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_chemistry
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_economics.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Economics
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_economics
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_education.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Education
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_education
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_geomatics.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Geomatics
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_geomatics
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_marketing.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Marketing
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_marketing
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_taxation.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Taxation
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_taxation
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_construction.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: construction
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_construction
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_criminal_law.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: criminal_law
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_criminal_law
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_real_estate.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: real_estate
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_real_estate
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_pubmed-abstracts.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_pubmed-abstracts
3 | dataset_name: pile_pubmed-abstracts
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/pile/pile_youtubesubtitles.yaml:
--------------------------------------------------------------------------------
1 | include: pile_arxiv.yaml
2 | task: pile_youtubesubtitles
3 | dataset_name: pile_youtubesubtitles
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/qa4mre/qa4mre_2012.yaml:
--------------------------------------------------------------------------------
1 | include: qa4mre_2011.yaml
2 | task: qa4mre_2012
3 | dataset_path: qa4mre
4 | dataset_name: 2012.main.EN
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/qa4mre/qa4mre_2013.yaml:
--------------------------------------------------------------------------------
1 | include: qa4mre_2011.yaml
2 | task: qa4mre_2013
3 | dataset_path: qa4mre
4 | dataset_name: 2013.main.EN
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xwinograd/xwinograd_en.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: en
3 | include: xwinograd_common_yaml
4 | task: xwinograd_en
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xwinograd/xwinograd_fr.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: fr
3 | include: xwinograd_common_yaml
4 | task: xwinograd_fr
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xwinograd/xwinograd_jp.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: jp
3 | include: xwinograd_common_yaml
4 | task: xwinograd_jp
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xwinograd/xwinograd_pt.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: pt
3 | include: xwinograd_common_yaml
4 | task: xwinograd_pt
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xwinograd/xwinograd_ru.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: ru
3 | include: xwinograd_common_yaml
4 | task: xwinograd_ru
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xwinograd/xwinograd_zh.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: zh
3 | include: xwinograd_common_yaml
4 | task: xwinograd_zh
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/intransitive.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: intransitive
3 | include: _template_yaml
4 | task: blimp_intransitive
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/haerae/haerae_gk.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "general_knowledge"
2 | "include": "_default_haerae_yaml"
3 | "task": "haerae_general_knowledge"
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_accounting.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Accounting
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_accounting
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_management.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Management
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_management
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_psychology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Psychology
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_psychology
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_real_estate.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Real-Estate
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_real_estate
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_law.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: law
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_law
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_math.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: math
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_math
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_public_safety.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: public_safety
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_public_safety
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/minerva_math/minerva_math_geometry.yaml:
--------------------------------------------------------------------------------
1 | include: minerva_math_algebra.yaml
2 | dataset_name: geometry
3 | task: minerva_math_geometry
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/minerva_math/minerva_math_precalc.yaml:
--------------------------------------------------------------------------------
1 | include: minerva_math_algebra.yaml
2 | dataset_name: precalculus
3 | task: minerva_math_precalc
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/mmlu/default/_mmlu.yaml:
--------------------------------------------------------------------------------
1 | group: mmlu
2 | task:
3 |   - mmlu_stem
4 |   - mmlu_other
5 |   - mmlu_social_sciences
6 |   - mmlu_humanities
7 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xcopa/default_ht.yaml:
--------------------------------------------------------------------------------
1 | include: default_et.yaml
2 | task: xcopa_ht
3 | dataset_name: ht
4 | doc_to_text: !function utils.doc_to_text_ht
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xcopa/default_id.yaml:
--------------------------------------------------------------------------------
1 | include: default_et.yaml
2 | task: xcopa_id
3 | dataset_name: id
4 | doc_to_text: !function utils.doc_to_text_id
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xcopa/default_it.yaml:
--------------------------------------------------------------------------------
1 | include: default_et.yaml
2 | task: xcopa_it
3 | dataset_name: it
4 | doc_to_text: !function utils.doc_to_text_it
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xcopa/default_qu.yaml:
--------------------------------------------------------------------------------
1 | include: default_et.yaml
2 | task: xcopa_qu
3 | dataset_name: qu
4 | doc_to_text: !function utils.doc_to_text_qu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xcopa/default_sw.yaml:
--------------------------------------------------------------------------------
1 | include: default_et.yaml
2 | task: xcopa_sw
3 | dataset_name: sw
4 | doc_to_text: !function utils.doc_to_text_sw
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xcopa/default_ta.yaml:
--------------------------------------------------------------------------------
1 | include: default_et.yaml
2 | task: xcopa_ta
3 | dataset_name: ta
4 | doc_to_text: !function utils.doc_to_text_ta
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xcopa/default_th.yaml:
--------------------------------------------------------------------------------
1 | include: default_et.yaml
2 | task: xcopa_th
3 | dataset_name: th
4 | doc_to_text: !function utils.doc_to_text_th
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xcopa/default_tr.yaml:
--------------------------------------------------------------------------------
1 | include: default_et.yaml
2 | task: xcopa_tr
3 | dataset_name: tr
4 | doc_to_text: !function utils.doc_to_text_tr
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xcopa/default_vi.yaml:
--------------------------------------------------------------------------------
1 | include: default_et.yaml
2 | task: xcopa_vi
3 | dataset_name: vi
4 | doc_to_text: !function utils.doc_to_text_vi
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xcopa/default_zh.yaml:
--------------------------------------------------------------------------------
1 | include: default_et.yaml
2 | task: xcopa_zh
3 | dataset_name: zh
4 | doc_to_text: !function utils.doc_to_text_zh
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/anli/anli_r2.yaml:
--------------------------------------------------------------------------------
1 | include: anli_r1.yaml
2 | task: anli_r2
3 | training_split: train_r2
4 | validation_split: dev_r2
5 | test_split: test_r2
6 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/anli/anli_r3.yaml:
--------------------------------------------------------------------------------
1 | include: anli_r1.yaml
2 | task: anli_r3
3 | training_split: train_r3
4 | validation_split: dev_r3
5 | test_split: test_r3
6 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/drop_argument.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: drop_argument
3 | include: _template_yaml
4 | task: blimp_drop_argument
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/npi_present_1.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: npi_present_1
3 | include: _template_yaml
4 | task: blimp_npi_present_1
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/npi_present_2.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: npi_present_2
3 | include: _template_yaml
4 | task: blimp_npi_present_2
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_construction.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Construction
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_construction
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_criminal_law.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Criminal-Law
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_criminal_law
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_health.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: health
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_health
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_patent.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: patent
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_patent
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: food_processing
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_food_processing
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: korean_history
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_korean_history
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_social_welfare.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: social_welfare
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_social_welfare
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/minerva_math/minerva_math_prealgebra.yaml:
--------------------------------------------------------------------------------
1 | include: minerva_math_algebra.yaml
2 | dataset_name: prealgebra
3 | task: minerva_math_prealgebra
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/adjunct_island.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: adjunct_island
3 | include: _template_yaml
4 | task: blimp_adjunct_island
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/only_npi_scope.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: only_npi_scope
3 | include: _template_yaml
4 | task: blimp_only_npi_scope
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/haerae/haerae_sn.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "standard_nomenclature"
2 | "include": "_default_haerae_yaml"
3 | "task": "haerae_standard_nomenclature"
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_korean_history.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Korean-History
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_korean_history
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_public_safety.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Public-Safety
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_public_safety
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_social_welfare.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Social-Welfare
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_social_welfare
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_biology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: biology
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_biology
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_ecology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: ecology
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_ecology
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_fashion.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: fashion
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_fashion
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_civil_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: civil_engineering
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_civil_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_computer_science.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: computer_science
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_computer_science
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_energy_management.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: energy_management
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_energy_management
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/minerva_math/minerva_math_num_theory.yaml:
--------------------------------------------------------------------------------
1 | include: minerva_math_algebra.yaml
2 | dataset_name: number_theory
3 | task: minerva_math_num_theory
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ar.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: ar
3 | include: _default_yaml
4 | task: m_mmlu_ar
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_bn.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: bn
3 | include: _default_yaml
4 | task: m_mmlu_bn
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ca.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: ca
3 | include: _default_yaml
4 | task: m_mmlu_ca
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_da.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: da
3 | include: _default_yaml
4 | task: m_mmlu_da
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_de.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: de
3 | include: _default_yaml
4 | task: m_mmlu_de
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_en.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: en
3 | include: _default_yaml
4 | task: m_mmlu_en
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_es.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: es
3 | include: _default_yaml
4 | task: m_mmlu_es
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_eu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: eu
3 | include: _default_yaml
4 | task: m_mmlu_eu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_fr.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: fr
3 | include: _default_yaml
4 | task: m_mmlu_fr
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_gu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gu
3 | include: _default_yaml
4 | task: m_mmlu_gu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_hi.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: hi
3 | include: _default_yaml
4 | task: m_mmlu_hi
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_hr.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: hr
3 | include: _default_yaml
4 | task: m_mmlu_hr
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_hu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: hu
3 | include: _default_yaml
4 | task: m_mmlu_hu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_hy.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: hy
3 | include: _default_yaml
4 | task: m_mmlu_hy
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_id.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: id
3 | include: _default_yaml
4 | task: m_mmlu_id
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_is.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: is
3 | include: _default_yaml
4 | task: m_mmlu_is
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_it.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: it
3 | include: _default_yaml
4 | task: m_mmlu_it
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_kn.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: kn
3 | include: _default_yaml
4 | task: m_mmlu_kn
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ml.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: ml
3 | include: _default_yaml
4 | task: m_mmlu_ml
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_mr.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: mr
3 | include: _default_yaml
4 | task: m_mmlu_mr
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_nb.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: nb
3 | include: _default_yaml
4 | task: m_mmlu_nb
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ne.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: ne
3 | include: _default_yaml
4 | task: m_mmlu_ne
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_nl.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: nl
3 | include: _default_yaml
4 | task: m_mmlu_nl
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_pt.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: pt
3 | include: _default_yaml
4 | task: m_mmlu_pt
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ro.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: ro
3 | include: _default_yaml
4 | task: m_mmlu_ro
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ru.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: ru
3 | include: _default_yaml
4 | task: m_mmlu_ru
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_sk.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: sk
3 | include: _default_yaml
4 | task: m_mmlu_sk
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_sr.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: sr
3 | include: _default_yaml
4 | task: m_mmlu_sr
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_sv.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: sv
3 | include: _default_yaml
4 | task: m_mmlu_sv
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_ta.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: ta
3 | include: _default_yaml
4 | task: m_mmlu_ta
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_te.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: te
3 | include: _default_yaml
4 | task: m_mmlu_te
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_uk.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: uk
3 | include: _default_yaml
4 | task: m_mmlu_uk
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_vi.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: vi
3 | include: _default_yaml
4 | task: m_mmlu_vi
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/okapi/mmlu_multilingual/m_mmlu_zh.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: zh
3 | include: _default_yaml
4 | task: m_mmlu_zh
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # EigenAttn
2 | 
3 | Official repository for "Eigen Attention : Attention in Low-Rank Space for KV Cache Compression". 
4 | 
5 | Code to be added soon...
6 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/complex_NP_island.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: complex_NP_island
3 | include: _template_yaml
4 | task: blimp_complex_NP_island
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/ellipsis_n_bar_1.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: ellipsis_n_bar_1
3 | include: _template_yaml
4 | task: blimp_ellipsis_n_bar_1
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/ellipsis_n_bar_2.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: ellipsis_n_bar_2
3 | include: _template_yaml
4 | task: blimp_ellipsis_n_bar_2
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/wh_vs_that_no_gap.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: wh_vs_that_no_gap
3 | include: _template_yaml
4 | task: blimp_wh_vs_that_no_gap
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_computer_science.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Computer-Science
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_computer_science
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_food_processing.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Food-Processing
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_food_processing
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemistry.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: chemistry
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_chemistry
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_economics.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: economics
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_economics
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_education.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: education
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_education
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_geomatics.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: geomatics
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_geomatics
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_marketing.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: marketing
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_marketing
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_taxation.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: taxation
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_taxation
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/polemo2/polemo2_out.yaml:
--------------------------------------------------------------------------------
1 | include: polemo2_in.yaml
2 | task: polemo2_out
3 | dataset_path: allegro/klej-polemo2-out
4 | dataset_name: klej-polemo2-out
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/principle_A_case_1.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: principle_A_case_1
3 | include: _template_yaml
4 | task: blimp_principle_A_case_1
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/principle_A_case_2.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: principle_A_case_2
3 | include: _template_yaml
4 | task: blimp_principle_A_case_2
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/tough_vs_raising_1.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: tough_vs_raising_1
3 | include: _template_yaml
4 | task: blimp_tough_vs_raising_1
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/tough_vs_raising_2.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: tough_vs_raising_2
3 | include: _template_yaml
4 | task: blimp_tough_vs_raising_2
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_ejauxiliar.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_ejauxiliar
3 | include: eus_exams_es
4 | task: eus_exams_es_ejauxiliar
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_ejtecnico.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_ejtecnico
3 | include: eus_exams_es
4 | task: eus_exams_es_ejtecnico
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opebilbao.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opebilbao
3 | include: eus_exams_es
4 | task: eus_exams_es_opebilbao
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeehuaux.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeehuaux
3 | include: eus_exams_es
4 | task: eus_exams_es_opeehuaux
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_ejlaguntza.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_ejlaguntza
3 | include: eus_exams_eu
4 | task: eus_exams_eu_ejlaguntza
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/gpqa/n_shot/gpqa_main_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_n_shot_yaml
4 | task: gpqa_main_n_shot
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_civil_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Civil-Engineering
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_civil_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_energy_management.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Energy-Management
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_energy_management
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_accounting.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: accounting
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_accounting
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_management.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: management
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_management
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_psychology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: psychology
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_psychology
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_real_estate.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: real_estate
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_real_estate
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_chemical_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: chemical_engineering
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_chemical_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: industrial_engineer
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_industrial_engineer
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: maritime_engineering
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_maritime_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_acm_Arab.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "acm_Arab"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_acm_Arab"
4 | "test_split": "acm_Arab"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_afr_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "afr_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_afr_Latn"
4 | "test_split": "afr_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_als_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "als_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_als_Latn"
4 | "test_split": "als_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_amh_Ethi.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "amh_Ethi"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_amh_Ethi"
4 | "test_split": "amh_Ethi"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_apc_Arab.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "apc_Arab"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_apc_Arab"
4 | "test_split": "apc_Arab"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_arb_Arab.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "arb_Arab"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_arb_Arab"
4 | "test_split": "arb_Arab"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_arb_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "arb_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_arb_Latn"
4 | "test_split": "arb_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_ars_Arab.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "ars_Arab"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_ars_Arab"
4 | "test_split": "ars_Arab"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_ary_Arab.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "ary_Arab"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_ary_Arab"
4 | "test_split": "ary_Arab"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_arz_Arab.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "arz_Arab"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_arz_Arab"
4 | "test_split": "arz_Arab"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_asm_Beng.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "asm_Beng"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_asm_Beng"
4 | "test_split": "asm_Beng"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_azj_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "azj_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_azj_Latn"
4 | "test_split": "azj_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_bam_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "bam_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_bam_Latn"
4 | "test_split": "bam_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_ben_Beng.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "ben_Beng"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_ben_Beng"
4 | "test_split": "ben_Beng"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_ben_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "ben_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_ben_Latn"
4 | "test_split": "ben_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_bod_Tibt.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "bod_Tibt"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_bod_Tibt"
4 | "test_split": "bod_Tibt"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_bul_Cyrl.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "bul_Cyrl"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_bul_Cyrl"
4 | "test_split": "bul_Cyrl"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_cat_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "cat_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_cat_Latn"
4 | "test_split": "cat_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_ceb_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "ceb_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_ceb_Latn"
4 | "test_split": "ceb_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_ces_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "ces_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_ces_Latn"
4 | "test_split": "ces_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_ckb_Arab.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "ckb_Arab"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_ckb_Arab"
4 | "test_split": "ckb_Arab"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_dan_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "dan_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_dan_Latn"
4 | "test_split": "dan_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_deu_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "deu_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_deu_Latn"
4 | "test_split": "deu_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_ell_Grek.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "ell_Grek"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_ell_Grek"
4 | "test_split": "ell_Grek"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_eng_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "eng_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_eng_Latn"
4 | "test_split": "eng_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_est_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "est_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_est_Latn"
4 | "test_split": "est_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_eus_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "eus_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_eus_Latn"
4 | "test_split": "eus_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_fin_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "fin_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_fin_Latn"
4 | "test_split": "fin_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_fra_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "fra_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_fra_Latn"
4 | "test_split": "fra_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_fuv_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "fuv_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_fuv_Latn"
4 | "test_split": "fuv_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_gaz_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "gaz_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_gaz_Latn"
4 | "test_split": "gaz_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_grn_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "grn_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_grn_Latn"
4 | "test_split": "grn_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_guj_Gujr.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "guj_Gujr"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_guj_Gujr"
4 | "test_split": "guj_Gujr"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_hat_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "hat_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_hat_Latn"
4 | "test_split": "hat_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_hau_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "hau_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_hau_Latn"
4 | "test_split": "hau_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_heb_Hebr.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "heb_Hebr"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_heb_Hebr"
4 | "test_split": "heb_Hebr"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_hin_Deva.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "hin_Deva"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_hin_Deva"
4 | "test_split": "hin_Deva"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_hin_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "hin_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_hin_Latn"
4 | "test_split": "hin_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_hrv_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "hrv_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_hrv_Latn"
4 | "test_split": "hrv_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_hun_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "hun_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_hun_Latn"
4 | "test_split": "hun_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_hye_Armn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "hye_Armn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_hye_Armn"
4 | "test_split": "hye_Armn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_ibo_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "ibo_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_ibo_Latn"
4 | "test_split": "ibo_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_ilo_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "ilo_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_ilo_Latn"
4 | "test_split": "ilo_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_ind_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "ind_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_ind_Latn"
4 | "test_split": "ind_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_isl_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "isl_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_isl_Latn"
4 | "test_split": "isl_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_ita_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "ita_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_ita_Latn"
4 | "test_split": "ita_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_jav_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "jav_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_jav_Latn"
4 | "test_split": "jav_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_jpn_Jpan.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "jpn_Jpan"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_jpn_Jpan"
4 | "test_split": "jpn_Jpan"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_kac_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "kac_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_kac_Latn"
4 | "test_split": "kac_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_kan_Knda.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "kan_Knda"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_kan_Knda"
4 | "test_split": "kan_Knda"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_kat_Geor.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "kat_Geor"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_kat_Geor"
4 | "test_split": "kat_Geor"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_kaz_Cyrl.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "kaz_Cyrl"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_kaz_Cyrl"
4 | "test_split": "kaz_Cyrl"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_kea_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "kea_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_kea_Latn"
4 | "test_split": "kea_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_khk_Cyrl.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "khk_Cyrl"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_khk_Cyrl"
4 | "test_split": "khk_Cyrl"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_khm_Khmr.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "khm_Khmr"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_khm_Khmr"
4 | "test_split": "khm_Khmr"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_kin_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "kin_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_kin_Latn"
4 | "test_split": "kin_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_kir_Cyrl.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "kir_Cyrl"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_kir_Cyrl"
4 | "test_split": "kir_Cyrl"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_kor_Hang.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "kor_Hang"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_kor_Hang"
4 | "test_split": "kor_Hang"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_lao_Laoo.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "lao_Laoo"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_lao_Laoo"
4 | "test_split": "lao_Laoo"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_lin_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "lin_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_lin_Latn"
4 | "test_split": "lin_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_lit_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "lit_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_lit_Latn"
4 | "test_split": "lit_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_lug_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "lug_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_lug_Latn"
4 | "test_split": "lug_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_luo_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "luo_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_luo_Latn"
4 | "test_split": "luo_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_lvs_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "lvs_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_lvs_Latn"
4 | "test_split": "lvs_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_mal_Mlym.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "mal_Mlym"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_mal_Mlym"
4 | "test_split": "mal_Mlym"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_mar_Deva.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "mar_Deva"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_mar_Deva"
4 | "test_split": "mar_Deva"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_mkd_Cyrl.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "mkd_Cyrl"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_mkd_Cyrl"
4 | "test_split": "mkd_Cyrl"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_mlt_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "mlt_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_mlt_Latn"
4 | "test_split": "mlt_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_mri_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "mri_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_mri_Latn"
4 | "test_split": "mri_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_mya_Mymr.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "mya_Mymr"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_mya_Mymr"
4 | "test_split": "mya_Mymr"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_nld_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "nld_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_nld_Latn"
4 | "test_split": "nld_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_nob_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "nob_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_nob_Latn"
4 | "test_split": "nob_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_npi_Deva.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "npi_Deva"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_npi_Deva"
4 | "test_split": "npi_Deva"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_npi_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "npi_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_npi_Latn"
4 | "test_split": "npi_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_nso_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "nso_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_nso_Latn"
4 | "test_split": "nso_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_nya_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "nya_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_nya_Latn"
4 | "test_split": "nya_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_ory_Orya.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "ory_Orya"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_ory_Orya"
4 | "test_split": "ory_Orya"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_pan_Guru.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "pan_Guru"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_pan_Guru"
4 | "test_split": "pan_Guru"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_pbt_Arab.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "pbt_Arab"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_pbt_Arab"
4 | "test_split": "pbt_Arab"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_pes_Arab.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "pes_Arab"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_pes_Arab"
4 | "test_split": "pes_Arab"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_plt_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "plt_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_plt_Latn"
4 | "test_split": "plt_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_pol_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "pol_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_pol_Latn"
4 | "test_split": "pol_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_por_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "por_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_por_Latn"
4 | "test_split": "por_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_ron_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "ron_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_ron_Latn"
4 | "test_split": "ron_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_rus_Cyrl.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "rus_Cyrl"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_rus_Cyrl"
4 | "test_split": "rus_Cyrl"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_shn_Mymr.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "shn_Mymr"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_shn_Mymr"
4 | "test_split": "shn_Mymr"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_sin_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "sin_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_sin_Latn"
4 | "test_split": "sin_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_sin_Sinh.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "sin_Sinh"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_sin_Sinh"
4 | "test_split": "sin_Sinh"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_slk_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "slk_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_slk_Latn"
4 | "test_split": "slk_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_slv_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "slv_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_slv_Latn"
4 | "test_split": "slv_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_sna_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "sna_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_sna_Latn"
4 | "test_split": "sna_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_snd_Arab.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "snd_Arab"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_snd_Arab"
4 | "test_split": "snd_Arab"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_som_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "som_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_som_Latn"
4 | "test_split": "som_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_sot_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "sot_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_sot_Latn"
4 | "test_split": "sot_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_spa_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "spa_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_spa_Latn"
4 | "test_split": "spa_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_srp_Cyrl.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "srp_Cyrl"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_srp_Cyrl"
4 | "test_split": "srp_Cyrl"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_ssw_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "ssw_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_ssw_Latn"
4 | "test_split": "ssw_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_sun_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "sun_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_sun_Latn"
4 | "test_split": "sun_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_swe_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "swe_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_swe_Latn"
4 | "test_split": "swe_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_swh_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "swh_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_swh_Latn"
4 | "test_split": "swh_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_tam_Taml.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "tam_Taml"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_tam_Taml"
4 | "test_split": "tam_Taml"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_tel_Telu.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "tel_Telu"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_tel_Telu"
4 | "test_split": "tel_Telu"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_tgk_Cyrl.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "tgk_Cyrl"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_tgk_Cyrl"
4 | "test_split": "tgk_Cyrl"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_tgl_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "tgl_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_tgl_Latn"
4 | "test_split": "tgl_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_tha_Thai.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "tha_Thai"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_tha_Thai"
4 | "test_split": "tha_Thai"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_tir_Ethi.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "tir_Ethi"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_tir_Ethi"
4 | "test_split": "tir_Ethi"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_tsn_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "tsn_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_tsn_Latn"
4 | "test_split": "tsn_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_tso_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "tso_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_tso_Latn"
4 | "test_split": "tso_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_tur_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "tur_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_tur_Latn"
4 | "test_split": "tur_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_ukr_Cyrl.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "ukr_Cyrl"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_ukr_Cyrl"
4 | "test_split": "ukr_Cyrl"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_urd_Arab.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "urd_Arab"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_urd_Arab"
4 | "test_split": "urd_Arab"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_urd_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "urd_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_urd_Latn"
4 | "test_split": "urd_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_uzn_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "uzn_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_uzn_Latn"
4 | "test_split": "uzn_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_vie_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "vie_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_vie_Latn"
4 | "test_split": "vie_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_war_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "war_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_war_Latn"
4 | "test_split": "war_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_wol_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "wol_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_wol_Latn"
4 | "test_split": "wol_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_xho_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "xho_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_xho_Latn"
4 | "test_split": "xho_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_yor_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "yor_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_yor_Latn"
4 | "test_split": "yor_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_zho_Hans.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "zho_Hans"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_zho_Hans"
4 | "test_split": "zho_Hans"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_zho_Hant.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "zho_Hant"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_zho_Hant"
4 | "test_split": "zho_Hant"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_zsm_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "zsm_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_zsm_Latn"
4 | "test_split": "zsm_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/belebele/belebele_zul_Latn.yaml:
--------------------------------------------------------------------------------
1 | "fewshot_split": "zul_Latn"
2 | "include": "_default_template_yaml"
3 | "task": "belebele_zul_Latn"
4 | "test_split": "zul_Latn"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/principle_A_domain_1.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: principle_A_domain_1
3 | include: _template_yaml
4 | task: blimp_principle_A_domain_1
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/principle_A_domain_2.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: principle_A_domain_2
3 | include: _template_yaml
4 | task: blimp_principle_A_domain_2
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/principle_A_domain_3.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: principle_A_domain_3
3 | include: _template_yaml
4 | task: blimp_principle_A_domain_3
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/wh_vs_that_with_gap.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: wh_vs_that_with_gap
3 | include: _template_yaml
4 | task: blimp_wh_vs_that_with_gap
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_ejsubalterno.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_ejsubalterno
3 | include: eus_exams_es
4 | task: eus_exams_es_ejsubalterno
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeehuadmin.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeehuadmin
3 | include: eus_exams_es
4 | task: eus_exams_es_opeehuadmin
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeehubiblio.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeehubiblio
3 | include: eus_exams_es
4 | task: eus_exams_es_opeehubiblio
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeosakiaux.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeosakiaux
3 | include: eus_exams_es
4 | task: eus_exams_es_opeosakiaux
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeosakienf.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeosakienf
3 | include: eus_exams_es
4 | task: eus_exams_es_opeosakienf
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_osakidetza1c.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_osakidetza1c
3 | include: eus_exams_es
4 | task: eus_exams_es_osakidetza1c
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_osakidetza2c.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_osakidetza2c
3 | include: eus_exams_es
4 | task: eus_exams_es_osakidetza2c
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_osakidetza3c.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_osakidetza3c
3 | include: eus_exams_es
4 | task: eus_exams_es_osakidetza3c
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_osakidetza4c.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_osakidetza4c
3 | include: eus_exams_es
4 | task: eus_exams_es_osakidetza4c
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_osakidetza5c.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_osakidetza5c
3 | include: eus_exams_es
4 | task: eus_exams_es_osakidetza5c
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_osakidetza6c.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_osakidetza6c
3 | include: eus_exams_es
4 | task: eus_exams_es_osakidetza6c
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_osakidetza7c.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_osakidetza7c
3 | include: eus_exams_es
4 | task: eus_exams_es_osakidetza7c
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_osakidetza8c.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_osakidetza8c
3 | include: eus_exams_es
4 | task: eus_exams_es_osakidetza8c
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_osakidetza9c.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_osakidetza9c
3 | include: eus_exams_es
4 | task: eus_exams_es_osakidetza9c
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_ejteknikari.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_ejteknikari
3 | include: eus_exams_eu
4 | task: eus_exams_eu_ejteknikari
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opebilbaoeu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opebilbaoeu
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opebilbaoeu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opeehuauxeu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opeehuauxeu
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opeehuauxeu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza1e.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_osakidetza1e
3 | include: eus_exams_eu
4 | task: eus_exams_eu_osakidetza1e
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza2e.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_osakidetza2e
3 | include: eus_exams_eu
4 | task: eus_exams_eu_osakidetza2e
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza3e.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_osakidetza3e
3 | include: eus_exams_eu
4 | task: eus_exams_eu_osakidetza3e
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza5e.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_osakidetza5e
3 | include: eus_exams_eu
4 | task: eus_exams_eu_osakidetza5e
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza6e.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_osakidetza6e
3 | include: eus_exams_eu
4 | task: eus_exams_eu_osakidetza6e
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_osakidetza7e.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_osakidetza7e
3 | include: eus_exams_eu
4 | task: eus_exams_eu_osakidetza7e
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_industrial_engineer.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Industrial-Engineer
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_industrial_engineer
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_construction.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: construction
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_construction
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_criminal_law.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: criminal_law
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_criminal_law
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_agricultural_sciences.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: agricultural_sciences
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_agricultural_sciences
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electrical_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: electrical_engineering
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_electrical_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_environmental_science.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: environmental_science
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_environmental_science
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: information_technology
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_information_technology
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: materials_engineering
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_materials_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: mechanical_engineering
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_mechanical_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: nondestructive_testing
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_nondestructive_testing
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/openness.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: openness
3 | include: _template_yaml
4 | task: persona_openness
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/agieval/jec-qa-ca.yaml:
--------------------------------------------------------------------------------
1 | include: aqua-rat.yaml
2 | group:
3 |   - agieval
4 |   - agieval_cn
5 | task: agieval_jec_qa_ca
6 | dataset_path: hails/agieval-jec-qa-ca
7 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/agieval/jec-qa-kd.yaml:
--------------------------------------------------------------------------------
1 | include: aqua-rat.yaml
2 | group:
3 |   - agieval
4 |   - agieval_cn
5 | task: agieval_jec_qa_kd
6 | dataset_path: hails/agieval-jec-qa-kd
7 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/agieval/logiqa-zh.yaml:
--------------------------------------------------------------------------------
1 | include: aqua-rat.yaml
2 | group:
3 |   - agieval
4 |   - agieval_cn
5 | task: agieval_logiqa_zh
6 | dataset_path: hails/agieval-logiqa-zh
7 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/animate_subject_trans.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: animate_subject_trans
3 | include: _template_yaml
4 | task: blimp_animate_subject_trans
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/principle_A_c_command.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: principle_A_c_command
3 | include: _template_yaml
4 | task: blimp_principle_A_c_command
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeehuderecho.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeehuderecho
3 | include: eus_exams_es
4 | task: eus_exams_es_opeehuderecho
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeehutecnico.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeehutecnico
3 | include: eus_exams_es
4 | task: eus_exams_es_opeehutecnico
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeosakiadmin.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeosakiadmin
3 | include: eus_exams_es
4 | task: eus_exams_es_opeosakiadmin
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_ejlaguntzaile.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_ejlaguntzaile
3 | include: eus_exams_eu
4 | task: eus_exams_eu_ejlaguntzaile
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opeehuadmineu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opeehuadmineu
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opeehuadmineu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opeosakiauxeu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opeosakiauxeu
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opeosakiauxeu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opeosakienfeu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opeosakienfeu
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opeosakienfeu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/gpqa/n_shot/gpqa_diamond_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_n_shot_yaml
4 | task: gpqa_diamond_n_shot
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/gpqa/zeroshot/gpqa_main_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_zeroshot_yaml
4 | task: gpqa_main_zeroshot
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_agricultural_sciences.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Agricultural-Sciences
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_agricultural_sciences
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_chemical_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Chemical-Engineering
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_chemical_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_environmental_science.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Environmental-Science
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_environmental_science
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_maritime_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Maritime-Engineering
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_maritime_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_materials_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Materials-Engineering
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_materials_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_korean_history.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: korean_history
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_korean_history
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_public_safety.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: public_safety
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_public_safety
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_social_welfare.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: social_welfare
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_social_welfare
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_electronics_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: electronics_engineering
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_electronics_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_refrigerating_machinery.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: refrigerating_machinery
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_refrigerating_machinery
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/narcissism.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: narcissism
3 | include: _template_yaml
4 | task: persona_narcissism
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/tmmluplus/default/tmmluplus.yaml:
--------------------------------------------------------------------------------
1 | group: tmmluplus
2 | task:
3 | - tmmluplus_other
4 | - tmmluplus_social_sciences
5 | - tmmluplus_humanities
6 | - tmmluplus_STEM
7 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/arithmetic/arithmetic_2da.yaml:
--------------------------------------------------------------------------------
1 | include: arithmetic_1dc.yaml
2 | task: arithmetic_2da
3 | dataset_name: arithmetic_2da
4 | dataset_kwargs:
5 |   trust_remote_code: true
6 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/arithmetic/arithmetic_2dm.yaml:
--------------------------------------------------------------------------------
1 | include: arithmetic_1dc.yaml
2 | task: arithmetic_2dm
3 | dataset_name: arithmetic_2dm
4 | dataset_kwargs:
5 |   trust_remote_code: true
6 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/arithmetic/arithmetic_2ds.yaml:
--------------------------------------------------------------------------------
1 | include: arithmetic_1dc.yaml
2 | task: arithmetic_2ds
3 | dataset_name: arithmetic_2ds
4 | dataset_kwargs:
5 |   trust_remote_code: true
6 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/arithmetic/arithmetic_3da.yaml:
--------------------------------------------------------------------------------
1 | include: arithmetic_1dc.yaml
2 | task: arithmetic_3da
3 | dataset_name: arithmetic_3da
4 | dataset_kwargs:
5 |   trust_remote_code: true
6 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/arithmetic/arithmetic_3ds.yaml:
--------------------------------------------------------------------------------
1 | include: arithmetic_1dc.yaml
2 | task: arithmetic_3ds
3 | dataset_name: arithmetic_3ds
4 | dataset_kwargs:
5 |   trust_remote_code: true
6 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/arithmetic/arithmetic_4da.yaml:
--------------------------------------------------------------------------------
1 | include: arithmetic_1dc.yaml
2 | task: arithmetic_4da
3 | dataset_name: arithmetic_4da
4 | dataset_kwargs:
5 |   trust_remote_code: true
6 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/arithmetic/arithmetic_4ds.yaml:
--------------------------------------------------------------------------------
1 | include: arithmetic_1dc.yaml
2 | task: arithmetic_4ds
3 | dataset_name: arithmetic_4ds
4 | dataset_kwargs:
5 |   trust_remote_code: true
6 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/arithmetic/arithmetic_5da.yaml:
--------------------------------------------------------------------------------
1 | include: arithmetic_1dc.yaml
2 | task: arithmetic_5da
3 | dataset_name: arithmetic_5da
4 | dataset_kwargs:
5 |   trust_remote_code: true
6 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/arithmetic/arithmetic_5ds.yaml:
--------------------------------------------------------------------------------
1 | include: arithmetic_1dc.yaml
2 | task: arithmetic_5ds
3 | dataset_name: arithmetic_5ds
4 | dataset_kwargs:
5 |   trust_remote_code: true
6 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/animate_subject_passive.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: animate_subject_passive
3 | include: _template_yaml
4 | task: blimp_animate_subject_passive
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/wh_questions_object_gap.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: wh_questions_object_gap
3 | include: _template_yaml
4 | task: blimp_wh_questions_object_gap
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ceval/ceval-valid_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "law"
2 | "description": "以下是中国关于法学的单项选择题，请选出其中的正确答案。\n\n"
3 | "include": "_default_ceval_yaml"
4 | "task": "ceval-valid_law"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeehutecnicob.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeehutecnicob
3 | include: eus_exams_es
4 | task: eus_exams_es_opeehutecnicob
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeosakiauxenf.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeosakiauxenf
3 | include: eus_exams_es
4 | task: eus_exams_es_opeosakiauxenf
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeosakicelador.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeosakicelador
3 | include: eus_exams_es
4 | task: eus_exams_es_opeosakicelador
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeosakitecnico.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeosakitecnico
3 | include: eus_exams_es
4 | task: eus_exams_es_opeosakitecnico
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeosakivarios.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeosakivarios
3 | include: eus_exams_es
4 | task: eus_exams_es_opeosakivarios
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_ejadministrari.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_ejadministrari
3 | include: eus_exams_eu
4 | task: eus_exams_eu_ejadministrari
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opeehubiblioeu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opeehubiblioeu
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opeehubiblioeu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opeehuderechoeu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opeehuderechoeu
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opeehuderechoeu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opeehutecnicoeu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opeehutecnicoeu
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opeehutecnicoeu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opeosakiadmineu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opeosakiadmineu
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opeosakiadmineu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/gpqa/n_shot/gpqa_extended_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_n_shot_yaml
4 | task: gpqa_extended_n_shot
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electrical_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Electrical-Engineering
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_electrical_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_information_technology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Information-Technology
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_information_technology
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_mechanical_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Mechanical-Engineering
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_mechanical_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_nondestructive_testing.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Nondestructive-Testing
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_nondestructive_testing
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_computer_science.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: computer_science
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_computer_science
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_food_processing.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: food_processing
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_food_processing
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/minerva_math/minerva_math_counting_and_prob.yaml:
--------------------------------------------------------------------------------
1 | include: minerva_math_algebra.yaml
2 | dataset_name: counting_and_probability
3 | task: minerva_math_counting_and_prob
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/minerva_math/minerva_math_intermediate_algebra.yaml:
--------------------------------------------------------------------------------
1 | include: minerva_math_algebra.yaml
2 | dataset_name: intermediate_algebra
3 | task: minerva_math_intermediate_algebra
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/neuroticism.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: neuroticism
3 | include: _template_yaml
4 | task: persona_neuroticism
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/psychopathy.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: psychopathy
3 | include: _template_yaml
4 | task: persona_psychopathy
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/risk-averse.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: risk-averse
3 | include: _template_yaml
4 | task: persona_risk-averse
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/agieval/gaokao-mathqa.yaml:
--------------------------------------------------------------------------------
1 | include: aqua-rat.yaml
2 | group:
3 |   - agieval
4 |   - agieval_cn
5 | task: agieval_gaokao_mathqa
6 | dataset_path: hails/agieval-gaokao-mathqa
7 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/agieval/sat-en.yaml:
--------------------------------------------------------------------------------
1 | include: aqua-rat.yaml
2 | group:
3 |   - agieval
4 |   - agieval_nous
5 |   - agieval_en
6 | task: agieval_sat_en
7 | dataset_path: hails/agieval-sat-en
8 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/gem.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: gem_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_gem_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/gem.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: gem_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_gem_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/anaphor_gender_agreement.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: anaphor_gender_agreement
3 | include: _template_yaml
4 | task: blimp_anaphor_gender_agreement
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/anaphor_number_agreement.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: anaphor_number_agreement
3 | include: _template_yaml
4 | task: blimp_anaphor_number_agreement
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/only_npi_licensor_present.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: only_npi_licensor_present
3 | include: _template_yaml
4 | task: blimp_only_npi_licensor_present
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/sentential_subject_island.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: sentential_subject_island
3 | include: _template_yaml
4 | task: blimp_sentential_subject_island
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/superlative_quantifiers_1.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: superlative_quantifiers_1
3 | include: _template_yaml
4 | task: blimp_superlative_quantifiers_1
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/superlative_quantifiers_2.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: superlative_quantifiers_2
3 | include: _template_yaml
4 | task: blimp_superlative_quantifiers_2
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/wh_questions_subject_gap.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: wh_questions_subject_gap
3 | include: _template_yaml
4 | task: blimp_wh_questions_subject_gap
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ceval/ceval-valid_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logic"
2 | "description": "以下是中国关于逻辑学的单项选择题，请选出其中的正确答案。\n\n"
3 | "include": "_default_ceval_yaml"
4 | "task": "ceval-valid_logic"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_arts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "arts"
2 | "description": "以下是关于艺术学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_arts"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_ejadministrativo.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_ejadministrativo
3 | include: eus_exams_es
4 | task: eus_exams_es_ejadministrativo
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeehueconomicas.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeehueconomicas
3 | include: eus_exams_es
4 | task: eus_exams_es_opeehueconomicas
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeehusubalterno.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeehusubalterno
3 | include: eus_exams_es
4 | task: eus_exams_es_opeehusubalterno
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeosakijuridico.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeosakijuridico
3 | include: eus_exams_es
4 | task: eus_exams_es_opeosakijuridico
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeosakioperario.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeosakioperario
3 | include: eus_exams_es
4 | task: eus_exams_es_opeosakioperario
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opeehuteknikarib.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opeehuteknikarib
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opeehuteknikarib
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opegasteizkoudala.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opegasteizkoudala
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opegasteizkoudala
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opeosakiauxenfeu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opeosakiauxenfeu
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opeosakiauxenfeu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opeosakiceladoreu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opeosakiceladoreu
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opeosakiceladoreu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opeosakitecnicoeu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opeosakitecnicoeu
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opeosakitecnicoeu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opeosakivarioseu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opeosakivarioseu
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opeosakivarioseu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/gpqa/cot_n_shot/gpqa_main_cot_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_cot_n_shot_yaml
4 | task: gpqa_main_cot_n_shot
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/gpqa/zeroshot/gpqa_diamond_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_zeroshot_yaml
4 | task: gpqa_diamond_zeroshot
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/gpqa/zeroshot/gpqa_extended_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_zeroshot_yaml
4 | task: gpqa_extended_zeroshot
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_electronics_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Electronics-Engineering
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_electronics_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_refrigerating_machinery.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Refrigerating-Machinery
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_refrigerating_machinery
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_civil_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: civil_engineering
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_civil_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_energy_management.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: energy_management
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_energy_management
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/agreeableness.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: agreeableness
3 | include: _template_yaml
4 | task: persona_agreeableness
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/extraversion.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: extraversion
3 | include: _template_yaml
4 | task: persona_extraversion
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/no-shut-down.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: no-shut-down
3 | include: _template_yaml
4 | task: persona_no-shut-down
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/risk-neutral.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: risk-neutral
3 | include: _template_yaml
4 | task: persona_risk-neutral
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/risk-seeking.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: risk-seeking
3 | include: _template_yaml
4 | task: persona_risk-seeking
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/agieval/gaokao-biology.yaml:
--------------------------------------------------------------------------------
1 | include: aqua-rat.yaml
2 | group:
3 |   - agieval
4 |   - agieval_cn
5 | task: agieval_gaokao_biology
6 | dataset_path: hails/agieval-gaokao-biology
7 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/agieval/gaokao-chinese.yaml:
--------------------------------------------------------------------------------
1 | include: aqua-rat.yaml
2 | group:
3 |   - agieval
4 |   - agieval_cn
5 | task: agieval_gaokao_chinese
6 | dataset_path: hails/agieval-gaokao-chinese
7 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/agieval/gaokao-history.yaml:
--------------------------------------------------------------------------------
1 | include: aqua-rat.yaml
2 | group:
3 |   - agieval
4 |   - agieval_cn
5 | task: agieval_gaokao_history
6 | dataset_path: hails/agieval-gaokao-history
7 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/agieval/gaokao-physics.yaml:
--------------------------------------------------------------------------------
1 | include: aqua-rat.yaml
2 | group:
3 |   - agieval
4 |   - agieval_cn
5 | task: agieval_gaokao_physics
6 | dataset_path: hails/agieval-gaokao-physics
7 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/agieval/lsat-ar.yaml:
--------------------------------------------------------------------------------
1 | include: aqua-rat.yaml
2 | group:
3 |   - agieval
4 |   - agieval_nous
5 |   - agieval_en
6 | task: agieval_lsat_ar
7 | dataset_path: hails/agieval-lsat-ar
8 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/agieval/lsat-lr.yaml:
--------------------------------------------------------------------------------
1 | include: aqua-rat.yaml
2 | group:
3 |   - agieval
4 |   - agieval_nous
5 |   - agieval_en
6 | task: agieval_lsat_lr
7 | dataset_path: hails/agieval-lsat-lr
8 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/agieval/lsat-rc.yaml:
--------------------------------------------------------------------------------
1 | include: aqua-rat.yaml
2 | group:
3 |   - agieval
4 |   - agieval_nous
5 |   - agieval_en
6 | task: agieval_lsat_rc
7 | dataset_path: hails/agieval-lsat-rc
8 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/agieval/sat-math.yaml:
--------------------------------------------------------------------------------
1 | include: aqua-rat.yaml
2 | group:
3 |   - agieval
4 |   - agieval_nous
5 |   - agieval_en
6 | task: agieval_sat_math
7 | dataset_path: hails/agieval-sat-math
8 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/color.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: color_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_color_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/snarks.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: snarks_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_snarks_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/tense.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: tense_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_tense_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/color.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: color_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_color_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/tense.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: tense_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_tense_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/determiner_noun_agreement_1.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: determiner_noun_agreement_1
3 | include: _template_yaml
4 | task: blimp_determiner_noun_agreement_1
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/determiner_noun_agreement_2.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: determiner_noun_agreement_2
3 | include: _template_yaml
4 | task: blimp_determiner_noun_agreement_2
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/expletive_it_object_raising.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: expletive_it_object_raising
3 | include: _template_yaml
4 | task: blimp_expletive_it_object_raising
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/principle_A_reconstruction.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: principle_A_reconstruction
3 | include: _template_yaml
4 | task: blimp_principle_A_reconstruction
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "以下是关于解剖学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_anatomy"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_logical.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical"
2 | "description": "以下是关于逻辑学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_logical"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/crows_pairs/crows_pairs_english_age.yaml:
--------------------------------------------------------------------------------
1 | include: crows_pairs_english.yaml
2 | task: crows_pairs_english_age
3 | dataset_name: english
4 | process_docs: !function utils.filter_age
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/crows_pairs/crows_pairs_french_age.yaml:
--------------------------------------------------------------------------------
1 | include: crows_pairs_english.yaml
2 | task: crows_pairs_french_age
3 | dataset_name: french
4 | process_docs: !function utils.filter_age
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opeehueconomicaseu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opeehueconomicaseu
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opeehueconomicaseu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opeehusubalternoeu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opeehusubalternoeu
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opeehusubalternoeu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opeosakioperarioeu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opeosakioperarioeu
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opeosakioperarioeu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/gpqa/cot_n_shot/gpqa_diamond_cot_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_cot_n_shot_yaml
4 | task: gpqa_diamond_cot_n_shot
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/gpqa/cot_zeroshot/gpqa_main_cot_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_cot_zeroshot_yaml
4 | task: gpqa_main_cot_zeroshot
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_industrial_engineer.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: industrial_engineer
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_industrial_engineer
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/has-disability.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: has-disability
3 | include: _template_yaml
4 | task: persona_has-disability
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/interest-in-art.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: interest-in-art
3 | include: _template_yaml
4 | task: persona_interest-in-art
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/no-goal-change.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: no-goal-change
3 | include: _template_yaml
4 | task: persona_no-goal-change
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/aexams/aexams_Science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "Science"
2 | "description": "قم بالإجابة على مايلي في مجال العلوم \n\n"
3 | "include": "_default_template_yaml"
4 | "task": "aexams_Science"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/agieval/gaokao-chemistry.yaml:
--------------------------------------------------------------------------------
1 | include: aqua-rat.yaml
2 | group:
3 |   - agieval
4 |   - agieval_cn
5 | task: agieval_gaokao_chemistry
6 | dataset_path: hails/agieval-gaokao-chemistry
7 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/agieval/gaokao-geography.yaml:
--------------------------------------------------------------------------------
1 | include: aqua-rat.yaml
2 | group:
3 |   - agieval
4 |   - agieval_cn
5 | task: agieval_gaokao_geography
6 | dataset_path: hails/agieval-gaokao-geography
7 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/agieval/logiqa-en.yaml:
--------------------------------------------------------------------------------
1 | include: aqua-rat.yaml
2 | group:
3 |   - agieval
4 |   - agieval_nous
5 |   - agieval_en
6 | task: agieval_logiqa_en
7 | dataset_path: hails/agieval-logiqa-en
8 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ammlu/ammlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "فم بعملية التقييم في مجال علوم أخرى \n\n"
3 | "include": "_default_template_yaml"
4 | "task": "ammlu_virology"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/kannada.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: kannada_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_kannada_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/physics.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: physics_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_physics_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/winowhy.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: winowhy_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_winowhy_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/snarks.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: snarks_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_snarks_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_agronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "agronomy"
2 | "description": "以下是关于农学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_agronomy"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "genetics"
2 | "description": "以下是关于遗传学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_genetics"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "以下是关于病毒学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_virology"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/crows_pairs/crows_pairs_english_autre.yaml:
--------------------------------------------------------------------------------
1 | include: crows_pairs_english.yaml
2 | task: crows_pairs_english_autre
3 | dataset_name: english
4 | process_docs: !function utils.filter_autre
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/crows_pairs/crows_pairs_french_autre.yaml:
--------------------------------------------------------------------------------
1 | include: crows_pairs_english.yaml
2 | task: crows_pairs_french_autre
3 | dataset_name: french
4 | process_docs: !function utils.filter_autre
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/crows_pairs/crows_pairs_french_gender.yaml:
--------------------------------------------------------------------------------
1 | include: crows_pairs_english.yaml
2 | task: crows_pairs_french_gender
3 | dataset_name: french
4 | process_docs: !function utils.filter_gender
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeehuempresariales.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeehuempresariales
3 | include: eus_exams_es
4 | task: eus_exams_es_opeehuempresariales
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/gpqa/cot_n_shot/gpqa_extended_cot_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_cot_n_shot_yaml
4 | task: gpqa_extended_cot_n_shot
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_agricultural_sciences.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: agricultural_sciences
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_agricultural_sciences
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_chemical_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: chemical_engineering
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_chemical_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_environmental_science.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: environmental_science
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_environmental_science
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_maritime_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: maritime_engineering
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_maritime_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_materials_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: materials_engineering
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_materials_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: gas_technology_and_engineering
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_gas_technology_and_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/anti-immigration.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: anti-immigration
3 | include: _template_yaml
4 | task: persona_anti-immigration
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/interest-in-math.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: interest-in-math
3 | include: _template_yaml
4 | task: persona_interest-in-math
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/machiavellianism.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: machiavellianism
3 | include: _template_yaml
4 | task: persona_machiavellianism
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/self-replication.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: self-replication
3 | include: _template_yaml
4 | task: persona_self-replication
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/xnli_eu/xnli_eu_native.yaml:
--------------------------------------------------------------------------------
1 | include: xnli_eu.yaml
2 | group: xnli_eu_mt_native
3 | task: xnli_eu_native
4 | training_split: null
5 | validation_split: null
6 | dataset_name: eu_native
7 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/aexams/aexams_Physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "Physics"
2 | "description": "قم بالإجابة على مايلي في مجال الفيزياء \n\n"
3 | "include": "_default_template_yaml"
4 | "task": "aexams_Physics"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ammlu/ammlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "فم بعملية التقييم في مجال علوم أخرى \n\n"
3 | "include": "_default_template_yaml"
4 | "task": "ammlu_management"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ammlu/ammlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "فم بعملية التقييم في مجال علوم أخرى \n\n"
3 | "include": "_default_template_yaml"
4 | "task": "ammlu_marketing"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ammlu/ammlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "فم بعملية التقييم في مجال علوم أخرى \n\n"
3 | "include": "_default_template_yaml"
4 | "task": "ammlu_nutrition"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/codenames.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: codenames_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_codenames_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/crass_ai.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: crass_ai_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_crass_ai_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/disfl_qa.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: disfl_qa_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_disfl_qa_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/multiemo.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: multiemo_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_multiemo_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/navigate.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: navigate_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_navigate_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/operators.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: operators_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_operators_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/rephrase.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: rephrase_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_rephrase_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/timedial.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: timedial_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_timedial_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: crass_ai_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_crass_ai_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: disfl_qa_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_disfl_qa_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/kannada.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: kannada_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_kannada_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/multiemo.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: multiemo_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_multiemo_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/navigate.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: navigate_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_navigate_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/physics.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: physics_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_physics_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/rephrase.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: rephrase_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_rephrase_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/timedial.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: timedial_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_timedial_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/winowhy.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: winowhy_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_winowhy_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/sentential_negation_npi_scope.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: sentential_negation_npi_scope
3 | include: _template_yaml
4 | task: blimp_sentential_negation_npi_scope
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ceval/ceval-valid_marxism.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marxism"
2 | "description": "以下是中国关于马克思主义基本原理的单项选择题，请选出其中的正确答案。\n\n"
3 | "include": "_default_ceval_yaml"
4 | "task": "ceval-valid_marxism"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ceval/ceval-valid_physician.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "physician"
2 | "description": "以下是中国关于医师资格的单项选择题，请选出其中的正确答案。\n\n"
3 | "include": "_default_ceval_yaml"
4 | "task": "ceval-valid_physician"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "以下是关于天文学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_astronomy"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_economics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "economics"
2 | "description": "以下是关于经济学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_economics"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_education.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "education"
2 | "description": "以下是关于教育学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_education"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_ethnology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "ethnology"
2 | "description": "以下是关于民族学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_ethnology"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_journalism.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "journalism"
2 | "description": "以下是关于新闻学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_journalism"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "以下是关于管理学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_management"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "以下是关于市场营销的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_marketing"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "以下是关于营养学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_nutrition"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "以下是关于哲学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_philosophy"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "以下是关于社会学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_sociology"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/crows_pairs/crows_pairs_english_gender.yaml:
--------------------------------------------------------------------------------
1 | include: crows_pairs_english.yaml
2 | task: crows_pairs_english_gender
3 | dataset_name: english
4 | process_docs: !function utils.filter_gender
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_es_opeayuntamientovitoria.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: es_opeayuntamientovitoria
3 | include: eus_exams_es
4 | task: eus_exams_es_opeayuntamientovitoria
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/eus_exams/eus_exams_eu_opeehuempresarialeseu.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: eu_opeehuempresarialeseu
3 | include: eus_exams_eu
4 | task: eus_exams_eu_opeehuempresarialeseu
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/gpqa/cot_zeroshot/gpqa_diamond_cot_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_cot_zeroshot_yaml
4 | task: gpqa_diamond_cot_zeroshot
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electrical_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: electrical_engineering
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_electrical_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_information_technology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: information_technology
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_information_technology
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_mechanical_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: mechanical_engineering
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_mechanical_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_nondestructive_testing.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: nondestructive_testing
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_nondestructive_testing
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: interior_architecture_and_design
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_interior_architecture_and_design
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: machine_design_and_manufacturing
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_machine_design_and_manufacturing
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: political_science_and_sociology
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_political_science_and_sociology
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/anti-LGBTQ-rights.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: anti-LGBTQ-rights
3 | include: _template_yaml
4 | task: persona_anti-LGBTQ-rights
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/conscientiousness.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: conscientiousness
3 | include: _template_yaml
4 | task: persona_conscientiousness
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/ends-justify-means.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: ends-justify-means
3 | include: _template_yaml
4 | task: persona_ends-justify-means
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/high-discount-rate.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: high-discount-rate
3 | include: _template_yaml
4 | task: persona_high-discount-rate
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/interest-in-music.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: interest-in-music
3 | include: _template_yaml
4 | task: persona_interest-in-music
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/interest-in-sports.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: interest-in-sports
3 | include: _template_yaml
4 | task: persona_interest-in-sports
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/low-discount-rate.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: low-discount-rate
3 | include: _template_yaml
4 | task: persona_low-discount-rate
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/stands-its-ground.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: stands-its-ground
3 | include: _template_yaml
4 | task: persona_stands-its-ground
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/aexams/aexams_Biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "Biology"
2 | "description": "قم بالإجابة على مايلي في مجال العلوم الحيوية\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "aexams_Biology"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/aexams/aexams_Social.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "Social"
2 | "description": "قم بالإجابة على مايلي في مجال العلوم الإجتماعية \n\n"
3 | "include": "_default_template_yaml"
4 | "task": "aexams_Social"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ammlu/ammlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "فم بعملية التقييم في مجال علوم أخرى \n\n"
3 | "include": "_default_template_yaml"
4 | "task": "ammlu_global_facts"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ammlu/ammlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "فم بعملية التقييم في مجال علوم أخرى \n\n"
3 | "include": "_default_template_yaml"
4 | "task": "ammlu_human_aging"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ammlu/ammlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "فم بعملية التقييم في مجال العلوم الإجتماعية \n\n"
3 | "include": "_default_template_yaml"
4 | "task": "ammlu_sociology"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/arithmetic.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: arithmetic_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_arithmetic_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/cryptonite.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: cryptonite_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_cryptonite_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/emoji_movie.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: emoji_movie_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_emoji_movie_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/hyperbaton.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: hyperbaton_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_hyperbaton_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/kanji_ascii.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: kanji_ascii_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_kanji_ascii_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/mnist_ascii.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: mnist_ascii_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_mnist_ascii_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/odd_one_out.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: odd_one_out_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_odd_one_out_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/parsinlu_qa.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: parsinlu_qa_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_parsinlu_qa_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/qa_wikidata.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: qa_wikidata_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_qa_wikidata_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/ruin_names.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: ruin_names_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_ruin_names_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/social_iqa.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: social_iqa_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_social_iqa_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/strategyqa.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: strategyqa_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_strategyqa_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: arithmetic_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_arithmetic_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/codenames.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: codenames_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_codenames_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: cryptonite_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_cryptonite_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/hyperbaton.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: hyperbaton_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_hyperbaton_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/operators.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: operators_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_operators_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/ruin_names.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: ruin_names_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_ruin_names_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/social_iqa.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: social_iqa_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_social_iqa_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/strategyqa.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: strategyqa_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_strategyqa_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/existential_there_object_raising.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: existential_there_object_raising
3 | include: _template_yaml
4 | task: blimp_existential_there_object_raising
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/existential_there_quantifiers_1.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: existential_there_quantifiers_1
3 | include: _template_yaml
4 | task: blimp_existential_there_quantifiers_1
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/existential_there_quantifiers_2.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: existential_there_quantifiers_2
3 | include: _template_yaml
4 | task: blimp_existential_there_quantifiers_2
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/irregular_past_participle_verbs.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: irregular_past_participle_verbs
3 | include: _template_yaml
4 | task: blimp_irregular_past_participle_verbs
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/left_branch_island_echo_question.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: left_branch_island_echo_question
3 | include: _template_yaml
4 | task: blimp_left_branch_island_echo_question
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/wh_vs_that_no_gap_long_distance.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: wh_vs_that_no_gap_long_distance
3 | include: _template_yaml
4 | task: blimp_wh_vs_that_no_gap_long_distance
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ceval/ceval-valid_accountant.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "accountant"
2 | "description": "以下是中国关于注册会计师的单项选择题，请选出其中的正确答案。\n\n"
3 | "include": "_default_ceval_yaml"
4 | "task": "ceval-valid_accountant"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ceval/ceval-valid_art_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "art_studies"
2 | "description": "以下是中国关于艺术学的单项选择题，请选出其中的正确答案。\n\n"
3 | "include": "_default_ceval_yaml"
4 | "task": "ceval-valid_art_studies"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_college_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_law"
2 | "description": "以下是关于大学法律的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_college_law"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/crows_pairs/crows_pairs_english_religion.yaml:
--------------------------------------------------------------------------------
1 | include: crows_pairs_english.yaml
2 | task: crows_pairs_english_religion
3 | dataset_name: english
4 | process_docs: !function utils.filter_religion
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/crows_pairs/crows_pairs_french_religion.yaml:
--------------------------------------------------------------------------------
1 | include: crows_pairs_english.yaml
2 | task: crows_pairs_french_religion
3 | dataset_name: french
4 | process_docs: !function utils.filter_religion
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/gpqa/cot_zeroshot/gpqa_extended_cot_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_cot_zeroshot_yaml
4 | task: gpqa_extended_cot_zeroshot
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/gpqa/generative/gpqa_main_generative_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_generative_n_shot_yaml
4 | task: gpqa_main_generative_n_shot
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_gas_technology_and_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Gas-Technology-and-Engineering
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_gas_technology_and_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_political_science_and_sociology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Political-Science-and-Sociology
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_political_science_and_sociology
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_electronics_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: electronics_engineering
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_electronics_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct_hard/kmmlu_direct_hard_refrigerating_machinery.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: refrigerating_machinery
2 | include: _direct_hard_kmmlu_yaml
3 | task: kmmlu_hard_direct_refrigerating_machinery
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/high-discount-factor.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: high-discount-factor
3 | include: _template_yaml
4 | task: persona_high-discount-factor
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/interest-in-science.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: interest-in-science
3 | include: _template_yaml
4 | task: persona_interest-in-science
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/low-discount-factor.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: low-discount-factor
3 | include: _template_yaml
4 | task: persona_low-discount-factor
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/no-power-discomfort.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: no-power-discomfort
3 | include: _template_yaml
4 | task: persona_no-power-discomfort
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/politically-liberal.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: politically-liberal
3 | include: _template_yaml
4 | task: persona_politically-liberal
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/resource-acquisition.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: resource-acquisition
3 | include: _template_yaml
4 | task: persona_resource-acquisition
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/subscribes-to-Islam.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: subscribes-to-Islam
3 | include: _template_yaml
4 | task: persona_subscribes-to-Islam
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/subscribes-to-Taoism.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: subscribes-to-Taoism
3 | include: _template_yaml
4 | task: persona_subscribes-to-Taoism
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/toxigen/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | 
4 | def doc_to_target(doc):
5 |     return np.round(((doc["toxicity_ai"] + doc["toxicity_human"]) > 5.5), 0).astype(
6 |         np.int32
7 |     )
8 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/aclue/aclue_ancient_medical.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "ancient_medical"
2 | "description": "以下是关于医古文的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "aclue_ancient_medical"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ammlu/ammlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "فم بعملية التقييم في مجال علوم أخرى \n\n"
3 | "include": "_default_template_yaml"
4 | "task": "ammlu_miscellaneous"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ammlu/ammlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "فم بعملية التقييم في مجال العلوم الانسانية \n\n"
3 | "include": "_default_template_yaml"
4 | "task": "ammlu_philosophy"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ammlu/ammlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "فم بعملية التقييم في مجال العلوم الانسانية \n\n"
3 | "include": "_default_template_yaml"
4 | "task": "ammlu_prehistory"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/anachronisms.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: anachronisms_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_anachronisms_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/fact_checker.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: fact_checker_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_fact_checker_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/few_shot_nlg.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: few_shot_nlg_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_few_shot_nlg_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/implicatures.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: implicatures_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_implicatures_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/logical_args.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: logical_args_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_logical_args_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/matrixshapes.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: matrixshapes_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_matrixshapes_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/riddle_sense.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: riddle_sense_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_riddle_sense_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/suicide_risk.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: suicide_risk_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_suicide_risk_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/topical_chat.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: topical_chat_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_topical_chat_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/word_sorting.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: word_sorting_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_word_sorting_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: emoji_movie_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_emoji_movie_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/kanji_ascii.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: kanji_ascii_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_kanji_ascii_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/mnist_ascii.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: mnist_ascii_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_mnist_ascii_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/odd_one_out.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: odd_one_out_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_odd_one_out_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/parsinlu_qa.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: parsinlu_qa_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_parsinlu_qa_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/qa_wikidata.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: qa_wikidata_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_qa_wikidata_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/existential_there_subject_raising.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: existential_there_subject_raising
3 | include: _template_yaml
4 | task: blimp_existential_there_subject_raising
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/blimp/wh_vs_that_with_gap_long_distance.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: wh_vs_that_with_gap_long_distance
3 | include: _template_yaml
4 | task: blimp_wh_vs_that_with_gap_long_distance
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ceval/ceval-valid_civil_servant.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "civil_servant"
2 | "description": "以下是中国关于公务员的单项选择题，请选出其中的正确答案。\n\n"
3 | "include": "_default_ceval_yaml"
4 | "task": "ceval-valid_civil_servant"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_food_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "food_science"
2 | "description": "以下是关于食品科学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_food_science"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "以下是关于全球事实的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_global_facts"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "以下是关于法理学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_jurisprudence"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/cmmlu/cmmlu_default_world_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_history"
2 | "description": "以下是关于世界历史的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "cmmlu_world_history"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/crows_pairs/crows_pairs_english_disability.yaml:
--------------------------------------------------------------------------------
1 | include: crows_pairs_english.yaml
2 | task: crows_pairs_english_disability
3 | dataset_name: english
4 | process_docs: !function utils.filter_disability
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/crows_pairs/crows_pairs_english_race_color.yaml:
--------------------------------------------------------------------------------
1 | include: crows_pairs_english.yaml
2 | task: crows_pairs_english_race_color
3 | dataset_name: english
4 | process_docs: !function utils.filter_race_color
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/crows_pairs/crows_pairs_french_disability.yaml:
--------------------------------------------------------------------------------
1 | include: crows_pairs_english.yaml
2 | task: crows_pairs_french_disability
3 | dataset_name: french
4 | process_docs: !function utils.filter_disability
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/crows_pairs/crows_pairs_french_nationality.yaml:
--------------------------------------------------------------------------------
1 | include: crows_pairs_english.yaml
2 | task: crows_pairs_french_nationality
3 | dataset_name: french
4 | process_docs: !function utils.filter_nationality
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/crows_pairs/crows_pairs_french_race_color.yaml:
--------------------------------------------------------------------------------
1 | include: crows_pairs_english.yaml
2 | task: crows_pairs_french_race_color
3 | dataset_name: french
4 | process_docs: !function utils.filter_race_color
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/crows_pairs/crows_pairs_french_socioeconomic.yaml:
--------------------------------------------------------------------------------
1 | include: crows_pairs_english.yaml
2 | task: crows_pairs_french_socioeconomic
3 | dataset_name: french
4 | process_docs: !function utils.filter_socio
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_interior_architecture_and_design.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Interior-Architecture-and-Design
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_interior_architecture_and_design
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/direct/kmmlu_direct_machine_design_and_manufacturing.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Machine-Design-and-Manufacturing
2 | include: _direct_kmmlu_yaml
3 | task: kmmlu_direct_machine_design_and_manufacturing
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/kmmlu/hard/kmmlu_hard_railway_and_automotive_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: railway_and_automotive_engineering
2 | include: _hard_kmmlu_yaml
3 | task: kmmlu_hard_railway_and_automotive_engineering
4 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/cognitive-enhancement.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: cognitive-enhancement
3 | include: _template_yaml
4 | task: persona_cognitive-enhancement
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/desire-for-popularity.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: desire-for-popularity
3 | include: _template_yaml
4 | task: persona_desire-for-popularity
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/desire-for-wide-usage.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: desire-for-wide-usage
3 | include: _template_yaml
4 | task: persona_desire-for-wide-usage
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/subscribes-to-Atheism.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: subscribes-to-Atheism
3 | include: _template_yaml
4 | task: persona_subscribes-to-Atheism
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/model_written_evals/persona/subscribes-to-Judaism.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: subscribes-to-Judaism
3 | include: _template_yaml
4 | task: persona_subscribes-to-Judaism
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/aclue/aclue_ancient_phonetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "ancient_phonetics"
2 | "description": "以下是关于古音学的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "aclue_ancient_phonetics"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/aclue/aclue_couplet_prediction.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "couplet_prediction"
2 | "description": "以下是关于对联的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "aclue_couplet_prediction"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/aclue/aclue_poetry_appreciate.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "poetry_appreciate"
2 | "description": "以下是关于古诗词曲鉴赏的单项选择题，请直接给出正确答案的选项。\n\n"
3 | "include": "_default_template_yaml"
4 | "task": "aclue_poetry_appreciate"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ammlu/ammlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "فم بعملية التقييم في مجال علوم أخرى \n\n"
3 | "include": "_default_template_yaml"
4 | "task": "ammlu_business_ethics"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ammlu/ammlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "فم بعملية التقييم في مجال العلوم الإجتماعية \n\n"
3 | "include": "_default_template_yaml"
4 | "task": "ammlu_econometrics"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ammlu/ammlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "فم بعملية التقييم في مجال العلوم الانسانية \n\n"
3 | "include": "_default_template_yaml"
4 | "task": "ammlu_formal_logic"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/ammlu/ammlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "فم بعملية التقييم في مجال العلوم الانسانية \n\n"
3 | "include": "_default_template_yaml"
4 | "task": "ammlu_jurisprudence"
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/auto_debugging.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: auto_debugging_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_auto_debugging_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/bbq_lite_json.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: bbq_lite_json_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_bbq_lite_json_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/crash_blossom.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: crash_blossom_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_crash_blossom_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/cs_algorithms.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: cs_algorithms_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_cs_algorithms_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/dyck_languages.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: dyck_languages_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_dyck_languages_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/hhh_alignment.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: hhh_alignment_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_hhh_alignment_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/key_value_maps.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: key_value_maps_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_key_value_maps_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/known_unknowns.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: known_unknowns_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_known_unknowns_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/language_games.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: language_games_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_language_games_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/list_functions.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: list_functions_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_list_functions_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/misconceptions.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: misconceptions_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_misconceptions_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/novel_concepts.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: novel_concepts_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_novel_concepts_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/persian_idioms.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: persian_idioms_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_persian_idioms_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/generate_until/social_support.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: social_support_zero_shot
3 | include: ../generate_until_template_yaml
4 | task: bigbench_social_support_generate_until
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: anachronisms_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_anachronisms_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/bbq_lite_json.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: bbq_lite_json_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_bbq_lite_json_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/crash_blossom.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: crash_blossom_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_crash_blossom_multiple_choice
5 | 


--------------------------------------------------------------------------------
/lm_eval/tasks/bigbench/multiple_choice/cs_algorithms.yaml:
--------------------------------------------------------------------------------
1 | # Generated by utils.py
2 | dataset_name: cs_algorithms_zero_shot
3 | include: ../multiple_choice_template_yaml
4 | task: bigbench_cs_algorithms_multiple_choice
5 | 


--------------------------------------------------------------------------------