├── tasks ├── leaderboard_it │ ├── README.md │ ├── __init__.py │ ├── gpqa_it │ │ ├── gpqa_main_zeroshot.yaml │ │ ├── gpqa_diamond_zeroshot.yaml │ │ ├── gpqa_extended_zeroshot.yaml │ │ ├── _leaderboard_gpqa.yaml │ │ ├── _template_yaml │ │ └── utils.py │ ├── musr_it │ │ ├── musr_team_allocation.yaml │ │ ├── musr_murder_mysteries.yaml │ │ ├── musr_object_placements.yaml │ │ ├── _musr.yaml │ │ ├── _template_yaml │ │ └── utils.py │ ├── bbh_it │ │ ├── _fewshot_template_yaml │ │ ├── boolean_expressions.yaml │ │ ├── sports_understanding.yaml │ │ ├── object_counting.yaml │ │ ├── web_of_lies.yaml │ │ ├── navigate.yaml │ │ ├── _leaderboard_bbh.yaml │ │ ├── date_understanding.yaml │ │ ├── movie_recommendation.yaml │ │ ├── disambiguation_qa.yaml │ │ ├── snarks.yaml │ │ ├── geometric_shapes.yaml │ │ ├── logical_deduction_three_objects.yaml │ │ ├── logical_deduction_five_objects.yaml │ │ ├── logical_deduction_seven_objects.yaml │ │ ├── penguins_in_a_table.yaml │ │ ├── tracking_shuffled_objects_three_objects.yaml │ │ └── tracking_shuffled_objects_five_objects.yaml │ ├── mmlu_pro_it │ │ ├── mmlu_pro.yaml │ │ └── utils.py │ ├── math_it │ │ └── math_hard_it.yaml │ ├── leaderboard_it.yaml │ └── ifeval_it │ │ └── ifeval.yaml ├── translations │ ├── piqa │ │ ├── en-x │ │ │ ├── piqa_en-en.yaml │ │ │ └── piqa_en-it.yaml │ │ ├── it-x │ │ │ ├── piqa_it-en.yaml │ │ │ └── piqa_it-it.yaml │ │ └── _piqa_yaml │ ├── sciq │ │ ├── en-x │ │ │ ├── sciq_en-en.yaml │ │ │ ├── sciq_en-it.yaml │ │ │ ├── sciq_with_passages_en-en.yaml │ │ │ └── sciq_with_passages_en-it.yaml │ │ ├── it-x │ │ │ ├── sciq_it-en.yaml │ │ │ ├── sciq_it-it.yaml │ │ │ ├── sciq_with_passages_it-en.yaml │ │ │ └── sciq_with_passages_it-it.yaml │ │ └── _sciq_yaml │ ├── boolq │ │ ├── en-x │ │ │ ├── boolq_en-en.yaml │ │ │ ├── boolq_en-it.yaml │ │ │ ├── boolq_with_passages_en-en.yaml │ │ │ └── boolq_with_passages_en-it.yaml │ │ ├── it-x │ │ │ ├── boolq_it-en.yaml │ │ │ ├── boolq_it-it.yaml │ │ │ ├── boolq_with_passages_it-en.yaml │ │ │ └── boolq_with_passages_it-it.yaml │ │ └── _boolq_yaml │ ├── arc_easy │ │ ├── en-x │ │ │ ├── arc_easy_en-en.yaml │ │ │ └── arc_easy_en-it.yaml │ │ ├── it-x │ │ │ ├── arc_easy_it-en.yaml │ │ │ └── arc_easy_it-it.yaml │ │ └── _arc_easy_yaml │ ├── hellaswag │ │ ├── en-x │ │ │ ├── hellaswag_en-en.yaml │ │ │ └── hellaswag_en-it.yaml │ │ ├── it-x │ │ │ ├── hellaswag_it-en.yaml │ │ │ └── hellaswag_it-it.yaml │ │ ├── _hellaswag_yaml │ │ └── utils.py │ ├── winogrande │ │ ├── en-x │ │ │ ├── winogrande_en-en.yaml │ │ │ └── winogrande_en-it.yaml │ │ ├── it-x │ │ │ ├── winogrande_it-en.yaml │ │ │ └── winogrande_it-it.yaml │ │ ├── _winogrande_yaml │ │ └── utils.py │ ├── arc_challenge │ │ ├── en-x │ │ │ ├── arc_challenge_en-en.yaml │ │ │ └── arc_challenge_en-it.yaml │ │ ├── it-x │ │ │ ├── arc_challenge_it-en.yaml │ │ │ └── arc_challenge_it-it.yaml │ │ └── _arc_challenge_yaml │ ├── gsm8k │ │ ├── en-x │ │ │ ├── gsm8k_multichoice_en-en.yaml │ │ │ ├── gsm8k_multichoice_en-it.yaml │ │ │ ├── gsm8k_generate_en-en.yaml │ │ │ └── gsm8k_generate_en-it.yaml │ │ ├── it-x │ │ │ ├── gsm8k_multichoice_it-en.yaml │ │ │ ├── gsm8k_multichoice_it-it.yaml │ │ │ ├── gsm8k_generate_it-en.yaml │ │ │ └── gsm8k_generate_it-it.yaml │ │ ├── _gsm8k_multichoice_yaml │ │ └── _gsm8k_generation_yaml │ ├── truthful_qa │ │ ├── en-x │ │ │ ├── truthful_qa_mc1_en-en.yaml │ │ │ ├── truthful_qa_mc1_en-it.yaml │ │ │ ├── truthful_qa_mc2_en-en.yaml │ │ │ └── truthful_qa_mc2_en-it.yaml │ │ ├── it-x │ │ │ ├── truthful_qa_mc1_it-en.yaml │ │ │ ├── truthful_qa_mc1_it-it.yaml │ │ │ ├── truthful_qa_mc2_it-en.yaml │ │ │ └── truthful_qa_mc2_it-it.yaml │ │ ├── _truthful_qa_mc1_yaml │ │ └── _truthful_qa_mc2_yaml │ ├── mmlu │ │ ├── en-en │ │ │ ├── itabench_mmlu_cloze_anatomy_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_virology_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_astronomy_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_marketing_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_nutrition_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_management_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_philosophy_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_prehistory_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_sociology_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_human_aging_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_miscellaneous_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_econometrics_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_global_facts_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_jurisprudence_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_anatomy_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_formal_logic_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_virology_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_business_ethics_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_college_biology_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_college_physics_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_moral_disputes_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_astronomy_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_marketing_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_nutrition_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_abstract_algebra_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_college_medicine_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_machine_learning_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_medical_genetics_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_moral_scenarios_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_world_religions_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_management_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_college_chemistry_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_computer_security_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_conceptual_physics_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_human_sexuality_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_professional_law_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_philosophy_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_prehistory_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_sociology_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_clinical_knowledge_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_college_mathematics_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_biology_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_physics_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_international_law_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_logical_fallacies_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_public_relations_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_security_studies_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_human_aging_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_miscellaneous_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_us_foreign_policy_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_econometrics_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_global_facts_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_jurisprudence_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_chemistry_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_professional_medicine_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_formal_logic_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_electrical_engineering_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_elementary_mathematics_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_mathematics_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_statistics_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_business_ethics_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_college_biology_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_college_physics_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_moral_disputes_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_college_computer_science_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_geography_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_us_history_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_professional_accounting_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_abstract_algebra_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_college_medicine_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_machine_learning_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_medical_genetics_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_moral_scenarios_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_world_religions_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_psychology_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_college_chemistry_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_computer_security_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_conceptual_physics_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_human_sexuality_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_professional_law_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_world_history_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_professional_psychology_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_clinical_knowledge_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_college_mathematics_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_biology_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_physics_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_international_law_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_logical_fallacies_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_public_relations_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_security_studies_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_computer_science_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_us_foreign_policy_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_macroeconomics_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_microeconomics_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_chemistry_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_professional_medicine_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_european_history_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_electrical_engineering_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_elementary_mathematics_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_mathematics_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_statistics_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_college_computer_science_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_geography_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_us_history_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_professional_accounting_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_psychology_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_world_history_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_professional_psychology_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_computer_science_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_macroeconomics_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_microeconomics_en-en.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_government_and_politics_en-en.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_european_history_en-en.yaml │ │ │ └── itabench_mmlu_multichoice_high_school_government_and_politics_en-en.yaml │ │ ├── it-it │ │ │ ├── itabench_mmlu_cloze_anatomy_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_astronomy_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_marketing_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_nutrition_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_virology_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_management_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_miscellaneous_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_philosophy_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_prehistory_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_sociology_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_econometrics_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_global_facts_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_formal_logic_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_human_aging_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_human_sexuality_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_jurisprudence_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_anatomy_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_business_ethics_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_astronomy_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_marketing_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_nutrition_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_virology_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_abstract_algebra_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_college_physics_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_machine_learning_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_medical_genetics_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_moral_disputes_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_moral_scenarios_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_management_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_college_biology_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_college_medicine_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_physics_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_world_religions_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_miscellaneous_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_philosophy_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_prehistory_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_sociology_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_clinical_knowledge_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_college_chemistry_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_computer_security_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_conceptual_physics_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_biology_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_logical_fallacies_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_professional_law_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_college_mathematics_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_chemistry_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_international_law_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_professional_accounting_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_public_relations_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_security_studies_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_econometrics_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_global_facts_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_electrical_engineering_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_statistics_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_professional_medicine_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_formal_logic_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_human_aging_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_human_sexuality_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_jurisprudence_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_elementary_mathematics_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_geography_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_mathematics_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_business_ethics_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_psychology_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_us_foreign_policy_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_abstract_algebra_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_college_physics_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_machine_learning_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_medical_genetics_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_moral_disputes_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_moral_scenarios_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_college_computer_science_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_college_biology_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_college_medicine_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_physics_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_world_religions_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_computer_science_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_us_history_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_world_history_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_professional_psychology_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_clinical_knowledge_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_college_chemistry_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_computer_security_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_conceptual_physics_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_biology_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_logical_fallacies_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_professional_law_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_macroeconomics_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_microeconomics_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_college_mathematics_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_chemistry_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_international_law_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_professional_accounting_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_public_relations_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_security_studies_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_european_history_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_electrical_engineering_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_statistics_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_professional_medicine_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_elementary_mathematics_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_geography_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_mathematics_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_psychology_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_us_foreign_policy_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_college_computer_science_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_computer_science_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_us_history_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_world_history_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_professional_psychology_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_macroeconomics_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_microeconomics_it-it.yaml │ │ │ ├── itabench_mmlu_cloze_high_school_government_and_politics_it-it.yaml │ │ │ ├── itabench_mmlu_multichoice_high_school_european_history_it-it.yaml │ │ │ └── itabench_mmlu_multichoice_high_school_government_and_politics_it-it.yaml │ │ └── _mmlu_yaml │ ├── itabench.trans.en-en.yaml │ └── itabench.trans.it-it.yaml └── adaptations │ ├── prelearn │ ├── cloze │ │ ├── __pycache__ │ │ │ └── utils.cpython-310.pyc │ │ ├── utils.py │ │ ├── prelearn_geometry_cloze.yaml │ │ ├── prelearn_physics_cloze.yaml │ │ ├── prelearn_data_mining_cloze.yaml │ │ ├── prelearn_precalculus_cloze.yaml │ │ └── prelearn_cloze.yaml │ ├── mc │ │ ├── prelearn_physics_mc.yaml │ │ ├── prelearn_geometry_mc.yaml │ │ ├── prelearn_data_mining_mc.yaml │ │ ├── prelearn_precalculus_mc.yaml │ │ └── prelearn_mc.yaml │ ├── _prelearn_physics_yaml │ ├── _prelearn_geometry_yaml │ ├── _prelearn_precalculus_yaml │ └── _prelearn_data_mining_yaml │ ├── pretens │ ├── cloze │ │ └── pretens_cloze.yaml │ ├── mc │ │ └── pretens_mc.yaml │ └── _pretens_yaml │ ├── nermud │ ├── mc │ │ ├── nermud_mc.yaml │ │ ├── nermud_wn_mc.yaml │ │ └── nermud_adg_mc.yaml │ ├── cloze │ │ ├── nermud_cloze.yaml │ │ ├── nermud_wn_cloze.yaml │ │ └── nermud_adg_cloze.yaml │ └── _nermud_yaml │ ├── ami │ ├── mc │ │ ├── ami_mc.yaml │ │ ├── ami_synth_mc.yaml │ │ └── ami_behaviour_mc.yaml │ ├── cloze │ │ ├── ami_cloze.yaml │ │ ├── ami_synth_cloze.yaml │ │ └── ami_behaviour_cloze.yaml │ ├── _ami_behaviour_yaml │ └── _ami_synth_yaml │ ├── itabench.adapt.cloze.yaml │ ├── itabench.adapt.mc.yaml │ ├── wic │ ├── mc │ │ └── wic_mc.yaml │ ├── cloze │ │ └── wic_cloze.yaml │ └── _wic_yaml │ ├── ghigliottinai │ ├── cloze │ │ └── ghigliottinai_cloze.yaml │ ├── mc │ │ └── ghigliottinai_mc.yaml │ └── _ghigliottinai_yaml │ ├── quandho │ └── mc │ │ └── quandho.yaml │ └── discotex │ └── mc │ └── discotex.yaml ├── .gitignore ├── requirements.txt └── assets └── images ├── ITA-bench.jpg └── ita-bench-figures.001.png /tasks/leaderboard_it/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | *.pyc 3 | __pycache__/ 4 | *.pyo 5 | *.pyd -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/EleutherAI/lm-evaluation-harness.git@v0.4.8#egg=lm-eval -------------------------------------------------------------------------------- /assets/images/ITA-bench.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SapienzaNLP/ita-bench/HEAD/assets/images/ITA-bench.jpg -------------------------------------------------------------------------------- /assets/images/ita-bench-figures.001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SapienzaNLP/ita-bench/HEAD/assets/images/ita-bench-figures.001.png -------------------------------------------------------------------------------- /tasks/leaderboard_it/gpqa_it/gpqa_main_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: main 2 | include: _template_yaml 3 | task: leaderboard_gpqa_main_it 4 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/gpqa_it/gpqa_diamond_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: diamond 2 | include: _template_yaml 3 | task: leaderboard_gpqa_diamond_it 4 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/gpqa_it/gpqa_extended_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: extended 2 | include: _template_yaml 3 | task: leaderboard_gpqa_extended_it 4 | -------------------------------------------------------------------------------- /tasks/translations/piqa/en-x/piqa_en-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_piqa_yaml 2 | task: itabench_piqa_en-en 3 | 4 | process_docs: !function ../utils.process_docs_en_en -------------------------------------------------------------------------------- /tasks/translations/piqa/en-x/piqa_en-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_piqa_yaml 2 | task: itabench_piqa_en-it 3 | 4 | process_docs: !function ../utils.process_docs_en_it -------------------------------------------------------------------------------- /tasks/translations/piqa/it-x/piqa_it-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_piqa_yaml 2 | task: itabench_piqa_it-en 3 | 4 | process_docs: !function ../utils.process_docs_it_en -------------------------------------------------------------------------------- /tasks/translations/piqa/it-x/piqa_it-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_piqa_yaml 2 | task: itabench_piqa_it-it 3 | 4 | process_docs: !function ../utils.process_docs_it_it -------------------------------------------------------------------------------- /tasks/translations/sciq/en-x/sciq_en-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_sciq_yaml 2 | task: itabench_sciq_en-en 3 | 4 | process_docs: !function ../utils.process_docs_en_en -------------------------------------------------------------------------------- /tasks/translations/sciq/en-x/sciq_en-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_sciq_yaml 2 | task: itabench_sciq_en-it 3 | 4 | process_docs: !function ../utils.process_docs_en_it -------------------------------------------------------------------------------- /tasks/translations/sciq/it-x/sciq_it-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_sciq_yaml 2 | task: itabench_sciq_it-en 3 | 4 | process_docs: !function ../utils.process_docs_it_en -------------------------------------------------------------------------------- /tasks/translations/sciq/it-x/sciq_it-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_sciq_yaml 2 | task: itabench_sciq_it-it 3 | 4 | process_docs: !function ../utils.process_docs_it_it -------------------------------------------------------------------------------- /tasks/leaderboard_it/musr_it/musr_team_allocation.yaml: -------------------------------------------------------------------------------- 1 | include: "_template_yaml" 2 | task: leaderboard_musr_team_allocation_it 3 | test_split: team_allocation 4 | -------------------------------------------------------------------------------- /tasks/translations/boolq/en-x/boolq_en-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_boolq_yaml 2 | task: itabench_boolq_en-en 3 | 4 | process_docs: !function ../utils.process_docs_en_en -------------------------------------------------------------------------------- /tasks/translations/boolq/en-x/boolq_en-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_boolq_yaml 2 | task: itabench_boolq_en-it 3 | 4 | process_docs: !function ../utils.process_docs_en_it -------------------------------------------------------------------------------- /tasks/translations/boolq/it-x/boolq_it-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_boolq_yaml 2 | task: itabench_boolq_it-en 3 | 4 | process_docs: !function ../utils.process_docs_it_en -------------------------------------------------------------------------------- /tasks/translations/boolq/it-x/boolq_it-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_boolq_yaml 2 | task: itabench_boolq_it-it 3 | 4 | process_docs: !function ../utils.process_docs_it_it -------------------------------------------------------------------------------- /tasks/leaderboard_it/musr_it/musr_murder_mysteries.yaml: -------------------------------------------------------------------------------- 1 | include: "_template_yaml" 2 | task: leaderboard_musr_murder_mysteries_it 3 | test_split: murder_mysteries 4 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/musr_it/musr_object_placements.yaml: -------------------------------------------------------------------------------- 1 | include: "_template_yaml" 2 | task: leaderboard_musr_object_placements_it 3 | test_split: object_placement 4 | -------------------------------------------------------------------------------- /tasks/translations/arc_easy/en-x/arc_easy_en-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_arc_easy_yaml 2 | task: itabench_arc_easy_en-en 3 | 4 | process_docs: !function ../utils.process_docs_en_en -------------------------------------------------------------------------------- /tasks/translations/arc_easy/en-x/arc_easy_en-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_arc_easy_yaml 2 | task: itabench_arc_easy_en-it 3 | 4 | process_docs: !function ../utils.process_docs_en_it -------------------------------------------------------------------------------- /tasks/translations/arc_easy/it-x/arc_easy_it-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_arc_easy_yaml 2 | task: itabench_arc_easy_it-en 3 | 4 | process_docs: !function ../utils.process_docs_it_en -------------------------------------------------------------------------------- /tasks/translations/arc_easy/it-x/arc_easy_it-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_arc_easy_yaml 2 | task: itabench_arc_easy_it-it 3 | 4 | process_docs: !function ../utils.process_docs_it_it -------------------------------------------------------------------------------- /tasks/translations/hellaswag/en-x/hellaswag_en-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_hellaswag_yaml 2 | task: itabench_hellaswag_en-en 3 | 4 | process_docs: !function ../utils.process_docs_en_en -------------------------------------------------------------------------------- /tasks/translations/hellaswag/en-x/hellaswag_en-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_hellaswag_yaml 2 | task: itabench_hellaswag_en-it 3 | 4 | process_docs: !function ../utils.process_docs_en_it -------------------------------------------------------------------------------- /tasks/translations/hellaswag/it-x/hellaswag_it-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_hellaswag_yaml 2 | task: itabench_hellaswag_it-en 3 | 4 | process_docs: !function ../utils.process_docs_it_en -------------------------------------------------------------------------------- /tasks/translations/hellaswag/it-x/hellaswag_it-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_hellaswag_yaml 2 | task: itabench_hellaswag_it-it 3 | 4 | process_docs: !function ../utils.process_docs_it_it -------------------------------------------------------------------------------- /tasks/translations/winogrande/en-x/winogrande_en-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_winogrande_yaml 2 | task: itabench_winogrande_en-en 3 | 4 | process_docs: !function ../utils.process_docs_en_en -------------------------------------------------------------------------------- /tasks/translations/winogrande/en-x/winogrande_en-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_winogrande_yaml 2 | task: itabench_winogrande_en-it 3 | 4 | process_docs: !function ../utils.process_docs_en_it -------------------------------------------------------------------------------- /tasks/translations/winogrande/it-x/winogrande_it-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_winogrande_yaml 2 | task: itabench_winogrande_it-en 3 | 4 | process_docs: !function ../utils.process_docs_it_en -------------------------------------------------------------------------------- /tasks/translations/winogrande/it-x/winogrande_it-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_winogrande_yaml 2 | task: itabench_winogrande_it-it 3 | 4 | process_docs: !function ../utils.process_docs_it_it -------------------------------------------------------------------------------- /tasks/adaptations/prelearn/cloze/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SapienzaNLP/ita-bench/HEAD/tasks/adaptations/prelearn/cloze/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /tasks/translations/arc_challenge/en-x/arc_challenge_en-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_arc_challenge_yaml 2 | task: itabench_arc_challenge_en-en 3 | 4 | process_docs: !function ../utils.process_docs_en_en -------------------------------------------------------------------------------- /tasks/translations/arc_challenge/en-x/arc_challenge_en-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_arc_challenge_yaml 2 | task: itabench_arc_challenge_en-it 3 | 4 | process_docs: !function ../utils.process_docs_en_it -------------------------------------------------------------------------------- /tasks/translations/arc_challenge/it-x/arc_challenge_it-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_arc_challenge_yaml 2 | task: itabench_arc_challenge_it-en 3 | 4 | process_docs: !function ../utils.process_docs_it_en -------------------------------------------------------------------------------- /tasks/translations/arc_challenge/it-x/arc_challenge_it-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_arc_challenge_yaml 2 | task: itabench_arc_challenge_it-it 3 | 4 | process_docs: !function ../utils.process_docs_it_it -------------------------------------------------------------------------------- /tasks/adaptations/prelearn/cloze/utils.py: -------------------------------------------------------------------------------- 1 | def doc_to_choice(doc): 2 | concept_A = doc["concept_A"] 3 | return [f"non è un prerequisito per {concept_A}", f"è un prerequisito per {concept_A}"] -------------------------------------------------------------------------------- /tasks/translations/gsm8k/en-x/gsm8k_multichoice_en-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_gsm8k_multichoice_yaml 2 | task: itabench_gsm8k_multichoice_en-en 3 | 4 | process_docs: !function ../utils.process_docs_en_en -------------------------------------------------------------------------------- /tasks/translations/gsm8k/en-x/gsm8k_multichoice_en-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_gsm8k_multichoice_yaml 2 | task: itabench_gsm8k_multichoice_en-it 3 | 4 | process_docs: !function ../utils.process_docs_en_it -------------------------------------------------------------------------------- /tasks/translations/gsm8k/it-x/gsm8k_multichoice_it-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_gsm8k_multichoice_yaml 2 | task: itabench_gsm8k_multichoice_it-en 3 | 4 | process_docs: !function ../utils.process_docs_it_en -------------------------------------------------------------------------------- /tasks/translations/gsm8k/it-x/gsm8k_multichoice_it-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_gsm8k_multichoice_yaml 2 | task: itabench_gsm8k_multichoice_it-it 3 | 4 | process_docs: !function ../utils.process_docs_it_it -------------------------------------------------------------------------------- /tasks/translations/truthful_qa/en-x/truthful_qa_mc1_en-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_truthful_qa_mc1_yaml 2 | task: itabench_truthful_qa_mc1_en-en 3 | 4 | process_docs: !function ../utils.process_docs_en_en -------------------------------------------------------------------------------- /tasks/translations/truthful_qa/en-x/truthful_qa_mc1_en-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_truthful_qa_mc1_yaml 2 | task: itabench_truthful_qa_mc1_en-it 3 | 4 | process_docs: !function ../utils.process_docs_en_it -------------------------------------------------------------------------------- /tasks/translations/truthful_qa/it-x/truthful_qa_mc1_it-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_truthful_qa_mc1_yaml 2 | task: itabench_truthful_qa_mc1_it-en 3 | 4 | process_docs: !function ../utils.process_docs_it_en -------------------------------------------------------------------------------- /tasks/translations/truthful_qa/it-x/truthful_qa_mc1_it-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_truthful_qa_mc1_yaml 2 | task: itabench_truthful_qa_mc1_it-it 3 | 4 | process_docs: !function ../utils.process_docs_it_it -------------------------------------------------------------------------------- /tasks/translations/boolq/en-x/boolq_with_passages_en-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_boolq_yaml 2 | task: itabench_boolq_with_passages_en-en 3 | 4 | process_docs: !function ../utils.process_docs_with_passages_en_en -------------------------------------------------------------------------------- /tasks/translations/boolq/en-x/boolq_with_passages_en-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_boolq_yaml 2 | task: itabench_boolq_with_passages_en-it 3 | 4 | process_docs: !function ../utils.process_docs_with_passages_en_it -------------------------------------------------------------------------------- /tasks/translations/boolq/it-x/boolq_with_passages_it-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_boolq_yaml 2 | task: itabench_boolq_with_passages_it-en 3 | 4 | process_docs: !function ../utils.process_docs_with_passages_it_en -------------------------------------------------------------------------------- /tasks/translations/boolq/it-x/boolq_with_passages_it-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_boolq_yaml 2 | task: itabench_boolq_with_passages_it-it 3 | 4 | process_docs: !function ../utils.process_docs_with_passages_it_it -------------------------------------------------------------------------------- /tasks/translations/sciq/en-x/sciq_with_passages_en-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_sciq_yaml 2 | task: itabench_sciq_with_passages_en-en 3 | 4 | process_docs: !function ../utils.process_docs_with_passages_en_en -------------------------------------------------------------------------------- /tasks/translations/sciq/en-x/sciq_with_passages_en-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_sciq_yaml 2 | task: itabench_sciq_with_passages_en-it 3 | 4 | process_docs: !function ../utils.process_docs_with_passages_en_it -------------------------------------------------------------------------------- /tasks/translations/sciq/it-x/sciq_with_passages_it-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_sciq_yaml 2 | task: itabench_sciq_with_passages_it-en 3 | 4 | process_docs: !function ../utils.process_docs_with_passages_it_en -------------------------------------------------------------------------------- /tasks/translations/sciq/it-x/sciq_with_passages_it-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_sciq_yaml 2 | task: itabench_sciq_with_passages_it-it 3 | 4 | process_docs: !function ../utils.process_docs_with_passages_it_it -------------------------------------------------------------------------------- /tasks/translations/truthful_qa/en-x/truthful_qa_mc2_en-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_truthful_qa_mc2_yaml 2 | task: itabench_truthful_qa_mc2_en-en 3 | 4 | process_docs: !function ../utils.process_docs_en_en_mc2 -------------------------------------------------------------------------------- /tasks/translations/truthful_qa/en-x/truthful_qa_mc2_en-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_truthful_qa_mc2_yaml 2 | task: itabench_truthful_qa_mc2_en-it 3 | 4 | process_docs: !function ../utils.process_docs_en_it_mc2 -------------------------------------------------------------------------------- /tasks/translations/truthful_qa/it-x/truthful_qa_mc2_it-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_truthful_qa_mc2_yaml 2 | task: itabench_truthful_qa_mc2_it-en 3 | 4 | process_docs: !function ../utils.process_docs_it_en_mc2 -------------------------------------------------------------------------------- /tasks/translations/truthful_qa/it-x/truthful_qa_mc2_it-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_truthful_qa_mc2_yaml 2 | task: itabench_truthful_qa_mc2_it-it 3 | 4 | process_docs: !function ../utils.process_docs_it_it_mc2 -------------------------------------------------------------------------------- /tasks/translations/gsm8k/en-x/gsm8k_generate_en-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_gsm8k_generation_yaml 2 | task: itabench_gsm8k_generation_en-en 3 | 4 | process_docs: !function ../utils.process_docs_with_explanations_en_en -------------------------------------------------------------------------------- /tasks/translations/gsm8k/en-x/gsm8k_generate_en-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_gsm8k_generation_yaml 2 | task: itabench_gsm8k_generation_en-it 3 | 4 | process_docs: !function ../utils.process_docs_with_explanations_en_it -------------------------------------------------------------------------------- /tasks/translations/gsm8k/it-x/gsm8k_generate_it-en.yaml: -------------------------------------------------------------------------------- 1 | include: ../_gsm8k_generation_yaml 2 | task: itabench_gsm8k_generation_it-en 3 | 4 | process_docs: !function ../utils.process_docs_with_explanations_it_en -------------------------------------------------------------------------------- /tasks/translations/gsm8k/it-x/gsm8k_generate_it-it.yaml: -------------------------------------------------------------------------------- 1 | include: ../_gsm8k_generation_yaml 2 | task: itabench_gsm8k_generation_it-it 3 | 4 | process_docs: !function ../utils.process_docs_with_explanations_it_it -------------------------------------------------------------------------------- /tasks/adaptations/pretens/cloze/pretens_cloze.yaml: -------------------------------------------------------------------------------- 1 | include: ../_pretens_yaml 2 | 3 | task: itabench_pretens_cloze 4 | 5 | doc_to_text: "{{text}}\nLa frase precedente" 6 | doc_to_target: label 7 | doc_to_choice: ["non ha senso", "ha senso"] 8 | -------------------------------------------------------------------------------- /tasks/adaptations/nermud/mc/nermud_mc.yaml: -------------------------------------------------------------------------------- 1 | group: itabench_nermud_mc 2 | task: 3 | - itabench_nermud_adg_mc 4 | - itabench_nermud_wn_mc 5 | aggregate_metric_list: 6 | - metric: acc 7 | aggregation: mean 8 | - metric: acc_norm 9 | aggregation: mean -------------------------------------------------------------------------------- /tasks/adaptations/prelearn/cloze/prelearn_geometry_cloze.yaml: -------------------------------------------------------------------------------- 1 | include: ../_prelearn_geometry_yaml 2 | 3 | task: itabench_prelearn_geometry_cloze 4 | doc_to_text: "{{concept_B}}" 5 | doc_to_target: target 6 | doc_to_choice: !function utils.doc_to_choice 7 | -------------------------------------------------------------------------------- /tasks/adaptations/prelearn/cloze/prelearn_physics_cloze.yaml: -------------------------------------------------------------------------------- 1 | include: ../_prelearn_physics_yaml 2 | 3 | task: itabench_prelearn_physics_cloze 4 | doc_to_text: "{{concept_B}}" 5 | doc_to_target: target 6 | doc_to_choice: !function utils.doc_to_choice 7 | -------------------------------------------------------------------------------- /tasks/adaptations/pretens/mc/pretens_mc.yaml: -------------------------------------------------------------------------------- 1 | include: ../_pretens_yaml 2 | 3 | task: itabench_pretens_mc 4 | 5 | doc_to_text: "{{text}}\nDomanda: La frase precedente ha senso? Rispondi sì o no:" 6 | doc_to_target: label 7 | doc_to_choice: ["no", "sì"] 8 | -------------------------------------------------------------------------------- /tasks/adaptations/prelearn/cloze/prelearn_data_mining_cloze.yaml: -------------------------------------------------------------------------------- 1 | include: ../_prelearn_data_mining_yaml 2 | 3 | task: itabench_prelearn_data_mining_cloze 4 | doc_to_text: "{{concept_B}}" 5 | doc_to_target: target 6 | doc_to_choice: !function utils.doc_to_choice 7 | -------------------------------------------------------------------------------- /tasks/adaptations/prelearn/cloze/prelearn_precalculus_cloze.yaml: -------------------------------------------------------------------------------- 1 | include: ../_prelearn_precalculus_yaml 2 | 3 | task: itabench_prelearn_precalculus_cloze 4 | doc_to_text: "{{concept_B}}" 5 | doc_to_target: target 6 | doc_to_choice: !function utils.doc_to_choice 7 | -------------------------------------------------------------------------------- /tasks/adaptations/ami/mc/ami_mc.yaml: -------------------------------------------------------------------------------- 1 | group: itabench_ami_mc 2 | 3 | task: 4 | - itabench_ami_behaviour_mc 5 | - itabench_ami_synth_mc 6 | aggregate_metric_list: 7 | - metric: acc 8 | aggregation: mean 9 | - metric: acc_norm 10 | aggregation: mean -------------------------------------------------------------------------------- /tasks/adaptations/itabench.adapt.cloze.yaml: -------------------------------------------------------------------------------- 1 | group: itabench_adapt_cloze 2 | 3 | task: 4 | - itabench_ami_cloze 5 | - itabench_ghigliottinai_cloze 6 | - itabench_nermud_cloze 7 | - itabench_prelearn_cloze 8 | - itabench_pretens_cloze 9 | - itabench_wic_cloze -------------------------------------------------------------------------------- /tasks/adaptations/nermud/cloze/nermud_cloze.yaml: -------------------------------------------------------------------------------- 1 | group: itabench_nermud_cloze 2 | task: 3 | - itabench_nermud_adg_cloze 4 | - itabench_nermud_wn_cloze 5 | aggregate_metric_list: 6 | - metric: acc 7 | aggregation: mean 8 | - metric: acc_norm 9 | aggregation: mean -------------------------------------------------------------------------------- /tasks/adaptations/ami/cloze/ami_cloze.yaml: -------------------------------------------------------------------------------- 1 | group: itabench_ami_cloze 2 | task: 3 | - itabench_ami_behaviour_cloze 4 | - itabench_ami_synth_cloze 5 | aggregate_metric_list: 6 | - metric: acc 7 | aggregation: mean 8 | - metric: acc_norm 9 | aggregation: mean 10 | -------------------------------------------------------------------------------- /tasks/adaptations/ami/mc/ami_synth_mc.yaml: -------------------------------------------------------------------------------- 1 | include: ../_ami_synth_yaml 2 | task: itabench_ami_synth_mc 3 | dataset_name: Synth 4 | doc_to_text: "Tweet: '{{text}}'\nDomanda: il tweet presenta caratteristiche misogine? Rispondi sì o no:" 5 | doc_to_target: label 6 | doc_to_choice: ["no", "sì"] -------------------------------------------------------------------------------- /tasks/adaptations/ami/cloze/ami_synth_cloze.yaml: -------------------------------------------------------------------------------- 1 | include: ../_ami_synth_yaml 2 | task: itabench_ami_synth_cloze 3 | dataset_name: Synth 4 | doc_to_text: "Tweet: '{{text}}'\nIl tweet" 5 | doc_to_target: label 6 | doc_to_choice: ["non presenta caratteristiche misogine", "presenta caratteristiche misogine"] 7 | -------------------------------------------------------------------------------- /tasks/adaptations/itabench.adapt.mc.yaml: -------------------------------------------------------------------------------- 1 | group: itabench_adapt_mc 2 | 3 | task: 4 | - itabench_ami_mc 5 | - itabench_discotex_mc 6 | - itabench_ghigliottinai_mc 7 | - itabench_nermud_mc 8 | - itabench_prelearn_mc 9 | - itabench_pretens_mc 10 | - itabench_quandho_mc 11 | - itabench_wic_mc -------------------------------------------------------------------------------- /tasks/adaptations/wic/mc/wic_mc.yaml: -------------------------------------------------------------------------------- 1 | include: ../_wic_yaml 2 | 3 | task: itabench_wic_mc 4 | 5 | doc_to_text: "Frase 1: {{sentence1}}\nFrase 2: {{sentence2}}\nDomanda: La parola \"{{lemma}}\" ha lo stesso signicato nelle due frasi precedenti? Rispondi sì o no:" 6 | doc_to_target: label 7 | doc_to_choice: ["no", "sì"] -------------------------------------------------------------------------------- /tasks/leaderboard_it/gpqa_it/_leaderboard_gpqa.yaml: -------------------------------------------------------------------------------- 1 | group: itabench_leaderboard_gpqa_it 2 | task: 3 | - leaderboard_gpqa_diamond_it 4 | - leaderboard_gpqa_extended_it 5 | - leaderboard_gpqa_main_it 6 | aggregate_metric_list: 7 | - metric: acc_norm 8 | aggregation: mean 9 | weight_by_size: true 10 | -------------------------------------------------------------------------------- /tasks/adaptations/wic/cloze/wic_cloze.yaml: -------------------------------------------------------------------------------- 1 | include: ../_wic_yaml 2 | 3 | task: itabench_wic_cloze 4 | 5 | doc_to_text: "Frase 1: {{sentence1}}\nFrase 2: {{sentence2}}\nLa parola \"{{lemma}}\" ha" 6 | doc_to_target: label 7 | doc_to_choice: ["un significato differente tra le due frasi", "lo stesso significato in entrambe le frasi"] -------------------------------------------------------------------------------- /tasks/adaptations/prelearn/mc/prelearn_physics_mc.yaml: -------------------------------------------------------------------------------- 1 | include: ../_prelearn_physics_yaml 2 | 3 | task: itabench_prelearn_physics_mc 4 | doc_to_text: "Domanda: il concetto \"{{concept_B}}\" è un prerequisito per la comprensione del concetto \"{{concept_A}}\"? Rispondi sì o no:" 5 | doc_to_target: target 6 | doc_to_choice: ["no", "sì"] -------------------------------------------------------------------------------- /tasks/adaptations/prelearn/mc/prelearn_geometry_mc.yaml: -------------------------------------------------------------------------------- 1 | include: ../_prelearn_geometry_yaml 2 | 3 | task: itabench_prelearn_geometry_mc 4 | doc_to_text: "Domanda: il concetto \"{{concept_B}}\" è un prerequisito per la comprensione del concetto \"{{concept_A}}\"? Rispondi sì o no:" 5 | doc_to_target: target 6 | doc_to_choice: ["no", "sì"] -------------------------------------------------------------------------------- /tasks/leaderboard_it/musr_it/_musr.yaml: -------------------------------------------------------------------------------- 1 | group: itabench_leaderboard_musr_it 2 | task: 3 | - leaderboard_musr_murder_mysteries_it 4 | - leaderboard_musr_object_placements_it 5 | - leaderboard_musr_team_allocation_it 6 | aggregate_metric_list: 7 | - metric: acc_norm 8 | aggregation: mean 9 | weight_by_size: true 10 | -------------------------------------------------------------------------------- /tasks/adaptations/nermud/cloze/nermud_wn_cloze.yaml: -------------------------------------------------------------------------------- 1 | include: ../_nermud_yaml 2 | 3 | task: itabench_nermud_wn_cloze 4 | dataset_path: sapienzanlp/nermud 5 | dataset_name: WN 6 | 7 | doc_to_text: "Data la frase: \"{{text}}\"\nL'entità {{target_entity}} è" 8 | doc_to_target: label 9 | doc_to_choice: ["un luogo", "un'organizzazione", "una persona"] -------------------------------------------------------------------------------- /tasks/adaptations/prelearn/mc/prelearn_data_mining_mc.yaml: -------------------------------------------------------------------------------- 1 | include: ../_prelearn_data_mining_yaml 2 | 3 | task: itabench_prelearn_data_mining_mc 4 | doc_to_text: "Domanda: il concetto \"{{concept_B}}\" è un prerequisito per la comprensione del concetto \"{{concept_A}}\"? Rispondi sì o no:" 5 | doc_to_target: target 6 | doc_to_choice: ["no", "sì"] -------------------------------------------------------------------------------- /tasks/adaptations/prelearn/mc/prelearn_precalculus_mc.yaml: -------------------------------------------------------------------------------- 1 | include: ../_prelearn_precalculus_yaml 2 | 3 | task: itabench_prelearn_precalculus_mc 4 | doc_to_text: "Domanda: il concetto \"{{concept_B}}\" è un prerequisito per la comprensione del concetto \"{{concept_A}}\"? Rispondi sì o no:" 5 | doc_to_target: target 6 | doc_to_choice: ["no", "sì"] -------------------------------------------------------------------------------- /tasks/adaptations/ami/mc/ami_behaviour_mc.yaml: -------------------------------------------------------------------------------- 1 | include: ../_ami_behaviour_yaml 2 | task: itabench_ami_behaviour_mc 3 | dataset_name: Behaviour 4 | doc_to_text: "Tweet: '{{text}}'\nDomanda: che livello di misoginia è presente nel tweet?\nA. Nessuno\nB. Misogino\nC. Misogino aggressivo\nRisposta:" 5 | doc_to_target: label 6 | doc_to_choice: ["A", "B", "C"] 7 | -------------------------------------------------------------------------------- /tasks/adaptations/nermud/cloze/nermud_adg_cloze.yaml: -------------------------------------------------------------------------------- 1 | include: ../_nermud_yaml 2 | 3 | task: itabench_nermud_adg_cloze 4 | dataset_path: sapienzanlp/nermud 5 | dataset_name: ADG 6 | 7 | doc_to_text: "Data la frase: \"{{text}}\"\nL'entità {{target_entity}} è" 8 | doc_to_target: label 9 | doc_to_choice: ["un luogo", "un'organizzazione", "una persona"] 10 | -------------------------------------------------------------------------------- /tasks/adaptations/ami/cloze/ami_behaviour_cloze.yaml: -------------------------------------------------------------------------------- 1 | include: ../_ami_behaviour_yaml 2 | task: itabench_ami_behaviour_cloze 3 | dataset_name: Behaviour 4 | doc_to_text: "Tweet: '{{text}}'\nIl tweet" 5 | doc_to_target: label 6 | doc_to_choice: ["non presenta caratteristiche misogine", "presenta caratteristiche misogine", "presenta caratteristiche misogine aggressive"] 7 | -------------------------------------------------------------------------------- /tasks/adaptations/prelearn/mc/prelearn_mc.yaml: -------------------------------------------------------------------------------- 1 | group: itabench_prelearn_mc 2 | 3 | task: 4 | - itabench_prelearn_data_mining_mc 5 | - itabench_prelearn_geometry_mc 6 | - itabench_prelearn_physics_mc 7 | - itabench_prelearn_precalculus_mc 8 | aggregate_metric_list: 9 | - metric: acc 10 | aggregation: mean 11 | - metric: acc_norm 12 | aggregation: mean -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_anatomy_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "anatomy". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_anatomy' 7 | tag: 8 | - itabench_mmlu_cloze_stem_en-en_tasks 9 | task: itabench_mmlu_cloze_anatomy_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_anatomy_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "anatomia". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_anatomy' 7 | tag: 8 | - itabench_mmlu_cloze_stem_it-it_tasks 9 | task: itabench_mmlu_cloze_anatomy_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_virology_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "virology". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_virology' 7 | tag: 8 | - itabench_mmlu_cloze_other_en-en_tasks 9 | task: itabench_mmlu_cloze_virology_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_astronomy_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "astronomia". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_astronomy' 7 | tag: 8 | - itabench_mmlu_cloze_stem_it-it_tasks 9 | task: itabench_mmlu_cloze_astronomy_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_marketing_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "marketing". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_marketing' 7 | tag: 8 | - itabench_mmlu_cloze_other_it-it_tasks 9 | task: itabench_mmlu_cloze_marketing_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_nutrition_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "nutrizione". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_nutrition' 7 | tag: 8 | - itabench_mmlu_cloze_other_it-it_tasks 9 | task: itabench_mmlu_cloze_nutrition_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_virology_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "virologia". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_virology' 7 | tag: 8 | - itabench_mmlu_cloze_other_it-it_tasks 9 | task: itabench_mmlu_cloze_virology_it-it 10 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/musr_it/_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/MUSR_italian 2 | output_type: multiple_choice 3 | doc_to_text: !function utils.doc_to_text 4 | doc_to_target: "{{answer_choice}}" 5 | doc_to_choice: "{{choice_translations}}" 6 | metric_list: 7 | - metric: acc_norm 8 | aggregation: mean 9 | higher_is_better: true 10 | metadata: 11 | version: 1.0 12 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_astronomy_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "astronomy". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_astronomy' 7 | tag: 8 | - itabench_mmlu_cloze_stem_en-en_tasks 9 | task: itabench_mmlu_cloze_astronomy_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_marketing_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "marketing". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_marketing' 7 | tag: 8 | - itabench_mmlu_cloze_other_en-en_tasks 9 | task: itabench_mmlu_cloze_marketing_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_nutrition_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "nutrition". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_nutrition' 7 | tag: 8 | - itabench_mmlu_cloze_other_en-en_tasks 9 | task: itabench_mmlu_cloze_nutrition_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_management_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "management". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_management' 7 | tag: 8 | - itabench_mmlu_cloze_other_it-it_tasks 9 | task: itabench_mmlu_cloze_management_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_management_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "management". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_management' 7 | tag: 8 | - itabench_mmlu_cloze_other_en-en_tasks 9 | task: itabench_mmlu_cloze_management_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_miscellaneous_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "varie". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_miscellaneous' 7 | tag: 8 | - itabench_mmlu_cloze_other_it-it_tasks 9 | task: itabench_mmlu_cloze_miscellaneous_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_philosophy_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "filosofia". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_philosophy' 7 | tag: 8 | - itabench_mmlu_cloze_humanities_it-it_tasks 9 | task: itabench_mmlu_cloze_philosophy_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_prehistory_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "preistoria". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_prehistory' 7 | tag: 8 | - itabench_mmlu_cloze_humanities_it-it_tasks 9 | task: itabench_mmlu_cloze_prehistory_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_sociology_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "sociologia". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_sociology' 7 | tag: 8 | - itabench_mmlu_cloze_social_sciences_it-it_tasks 9 | task: itabench_mmlu_cloze_sociology_it-it 10 | -------------------------------------------------------------------------------- /tasks/adaptations/prelearn/cloze/prelearn_cloze.yaml: -------------------------------------------------------------------------------- 1 | group: itabench_prelearn_cloze 2 | task: 3 | - itabench_prelearn_data_mining_cloze 4 | - itabench_prelearn_geometry_cloze 5 | - itabench_prelearn_physics_cloze 6 | - itabench_prelearn_precalculus_cloze 7 | aggregate_metric_list: 8 | - metric: acc 9 | aggregation: mean 10 | - metric: acc_norm 11 | aggregation: mean 12 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_philosophy_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "philosophy". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_philosophy' 7 | tag: 8 | - itabench_mmlu_cloze_humanities_en-en_tasks 9 | task: itabench_mmlu_cloze_philosophy_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_prehistory_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "prehistory". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_prehistory' 7 | tag: 8 | - itabench_mmlu_cloze_humanities_en-en_tasks 9 | task: itabench_mmlu_cloze_prehistory_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_sociology_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "sociology". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_sociology' 7 | tag: 8 | - itabench_mmlu_cloze_social_sciences_en-en_tasks 9 | task: itabench_mmlu_cloze_sociology_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_human_aging_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "human 2 | aging". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_human_aging' 8 | tag: 9 | - itabench_mmlu_cloze_other_en-en_tasks 10 | task: itabench_mmlu_cloze_human_aging_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_miscellaneous_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "miscellaneous". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_miscellaneous' 7 | tag: 8 | - itabench_mmlu_cloze_other_en-en_tasks 9 | task: itabench_mmlu_cloze_miscellaneous_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_econometrics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "econometria". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_econometrics' 7 | tag: 8 | - itabench_mmlu_cloze_social_sciences_it-it_tasks 9 | task: itabench_mmlu_cloze_econometrics_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_global_facts_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "fatti 2 | globali". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_global_facts' 8 | tag: 9 | - itabench_mmlu_cloze_other_it-it_tasks 10 | task: itabench_mmlu_cloze_global_facts_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_econometrics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "econometrics". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_econometrics' 7 | tag: 8 | - itabench_mmlu_cloze_social_sciences_en-en_tasks 9 | task: itabench_mmlu_cloze_econometrics_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_global_facts_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "global 2 | facts". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_global_facts' 8 | tag: 9 | - itabench_mmlu_cloze_other_en-en_tasks 10 | task: itabench_mmlu_cloze_global_facts_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_jurisprudence_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "jurisprudence". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_jurisprudence' 7 | tag: 8 | - itabench_mmlu_cloze_humanities_en-en_tasks 9 | task: itabench_mmlu_cloze_jurisprudence_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_anatomy_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "anatomy". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_anatomy' 7 | tag: 8 | - itabench_mmlu_multichoice_stem_en-en_tasks 9 | task: itabench_mmlu_multichoice_anatomy_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_formal_logic_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "logica 2 | formale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_formal_logic' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_it-it_tasks 10 | task: itabench_mmlu_cloze_formal_logic_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_human_aging_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "invecchiamento 2 | umano". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_human_aging' 8 | tag: 9 | - itabench_mmlu_cloze_other_it-it_tasks 10 | task: itabench_mmlu_cloze_human_aging_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_human_sexuality_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: "Le seguenti sono domande a scelta multipla (con risposte) su \"sessualit\xE0\ 2 | \ umana\".\n\n" 3 | include: ../_mmlu_yaml 4 | process_docs: !function 'cloze_utils.process_docs_cloze_human_sexuality' 5 | tag: 6 | - itabench_mmlu_cloze_social_sciences_it-it_tasks 7 | task: itabench_mmlu_cloze_human_sexuality_it-it 8 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_jurisprudence_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "giurisprudenza". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'cloze_utils.process_docs_cloze_jurisprudence' 7 | tag: 8 | - itabench_mmlu_cloze_humanities_it-it_tasks 9 | task: itabench_mmlu_cloze_jurisprudence_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_anatomy_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "anatomia". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_anatomy' 7 | tag: 8 | - itabench_mmlu_multichoice_stem_it-it_tasks 9 | task: itabench_mmlu_multichoice_anatomy_it-it 10 | -------------------------------------------------------------------------------- /tasks/adaptations/ghigliottinai/cloze/ghigliottinai_cloze.yaml: -------------------------------------------------------------------------------- 1 | include: ../_ghigliottinai_yaml 2 | 3 | task: itabench_ghigliottinai_cloze 4 | 5 | doc_to_text: "Date le parole: {{w1}}, {{w2}}, {{w3}}, {{w4}}, {{w5}}\nDomanda: Quale tra i seguenti concetti è quello che lega le parole date?\n{{choices[0]}}\n{{choices[1]}}\n{{choices[2]}}\n{{choices[3]}}\nRisposta:" 6 | doc_to_target: label 7 | doc_to_choice: choices -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_formal_logic_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "formal 2 | logic". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_formal_logic' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_en-en_tasks 10 | task: itabench_mmlu_cloze_formal_logic_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_virology_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "virology". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_virology' 7 | tag: 8 | - itabench_mmlu_multichoice_other_en-en_tasks 9 | task: itabench_mmlu_multichoice_virology_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_business_ethics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "etica 2 | aziendale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_business_ethics' 8 | tag: 9 | - itabench_mmlu_cloze_other_it-it_tasks 10 | task: itabench_mmlu_cloze_business_ethics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_astronomy_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "astronomia". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_astronomy' 7 | tag: 8 | - itabench_mmlu_multichoice_stem_it-it_tasks 9 | task: itabench_mmlu_multichoice_astronomy_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_marketing_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "marketing". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_marketing' 7 | tag: 8 | - itabench_mmlu_multichoice_other_it-it_tasks 9 | task: itabench_mmlu_multichoice_marketing_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_nutrition_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "nutrizione". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_nutrition' 7 | tag: 8 | - itabench_mmlu_multichoice_other_it-it_tasks 9 | task: itabench_mmlu_multichoice_nutrition_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_virology_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "virologia". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_virology' 7 | tag: 8 | - itabench_mmlu_multichoice_other_it-it_tasks 9 | task: itabench_mmlu_multichoice_virology_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_business_ethics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "business 2 | ethics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_business_ethics' 8 | tag: 9 | - itabench_mmlu_cloze_other_en-en_tasks 10 | task: itabench_mmlu_cloze_business_ethics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_college_biology_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "college 2 | biology". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_college_biology' 8 | tag: 9 | - itabench_mmlu_cloze_stem_en-en_tasks 10 | task: itabench_mmlu_cloze_college_biology_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_college_physics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "college 2 | physics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_college_physics' 8 | tag: 9 | - itabench_mmlu_cloze_stem_en-en_tasks 10 | task: itabench_mmlu_cloze_college_physics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_moral_disputes_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "moral 2 | disputes". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_moral_disputes' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_en-en_tasks 10 | task: itabench_mmlu_cloze_moral_disputes_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_astronomy_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "astronomy". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_astronomy' 7 | tag: 8 | - itabench_mmlu_multichoice_stem_en-en_tasks 9 | task: itabench_mmlu_multichoice_astronomy_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_marketing_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "marketing". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_marketing' 7 | tag: 8 | - itabench_mmlu_multichoice_other_en-en_tasks 9 | task: itabench_mmlu_multichoice_marketing_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_nutrition_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "nutrition". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_nutrition' 7 | tag: 8 | - itabench_mmlu_multichoice_other_en-en_tasks 9 | task: itabench_mmlu_multichoice_nutrition_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_abstract_algebra_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "algebra 2 | astratta". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_abstract_algebra' 8 | tag: 9 | - itabench_mmlu_cloze_stem_it-it_tasks 10 | task: itabench_mmlu_cloze_abstract_algebra_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_college_physics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "fisica 2 | universitaria". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_college_physics' 8 | tag: 9 | - itabench_mmlu_cloze_stem_it-it_tasks 10 | task: itabench_mmlu_cloze_college_physics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_machine_learning_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "machine 2 | learning". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_machine_learning' 8 | tag: 9 | - itabench_mmlu_cloze_stem_it-it_tasks 10 | task: itabench_mmlu_cloze_machine_learning_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_medical_genetics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "genetica 2 | medica". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_medical_genetics' 8 | tag: 9 | - itabench_mmlu_cloze_other_it-it_tasks 10 | task: itabench_mmlu_cloze_medical_genetics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_moral_disputes_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "dispute 2 | morali". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_moral_disputes' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_it-it_tasks 10 | task: itabench_mmlu_cloze_moral_disputes_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_moral_scenarios_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "scenari 2 | morali". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_moral_scenarios' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_it-it_tasks 10 | task: itabench_mmlu_cloze_moral_scenarios_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_management_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "management". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_management' 7 | tag: 8 | - itabench_mmlu_multichoice_other_it-it_tasks 9 | task: itabench_mmlu_multichoice_management_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_abstract_algebra_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "abstract 2 | algebra". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_abstract_algebra' 8 | tag: 9 | - itabench_mmlu_cloze_stem_en-en_tasks 10 | task: itabench_mmlu_cloze_abstract_algebra_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_college_medicine_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "college 2 | medicine". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_college_medicine' 8 | tag: 9 | - itabench_mmlu_cloze_other_en-en_tasks 10 | task: itabench_mmlu_cloze_college_medicine_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_machine_learning_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "machine 2 | learning". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_machine_learning' 8 | tag: 9 | - itabench_mmlu_cloze_stem_en-en_tasks 10 | task: itabench_mmlu_cloze_machine_learning_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_medical_genetics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "medical 2 | genetics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_medical_genetics' 8 | tag: 9 | - itabench_mmlu_cloze_other_en-en_tasks 10 | task: itabench_mmlu_cloze_medical_genetics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_moral_scenarios_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "moral 2 | scenarios". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_moral_scenarios' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_en-en_tasks 10 | task: itabench_mmlu_cloze_moral_scenarios_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_world_religions_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "world 2 | religions". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_world_religions' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_en-en_tasks 10 | task: itabench_mmlu_cloze_world_religions_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_management_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "management". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_management' 7 | tag: 8 | - itabench_mmlu_multichoice_other_en-en_tasks 9 | task: itabench_mmlu_multichoice_management_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_college_biology_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "biologia 2 | universitaria". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_college_biology' 8 | tag: 9 | - itabench_mmlu_cloze_stem_it-it_tasks 10 | task: itabench_mmlu_cloze_college_biology_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_college_medicine_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "medicina 2 | universitaria". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_college_medicine' 8 | tag: 9 | - itabench_mmlu_cloze_other_it-it_tasks 10 | task: itabench_mmlu_cloze_college_medicine_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_high_school_physics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "fisica 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_physics' 8 | tag: 9 | - itabench_mmlu_cloze_stem_it-it_tasks 10 | task: itabench_mmlu_cloze_high_school_physics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_world_religions_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "religioni 2 | mondiali". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_world_religions' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_it-it_tasks 10 | task: itabench_mmlu_cloze_world_religions_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_miscellaneous_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "varie". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_miscellaneous' 7 | tag: 8 | - itabench_mmlu_multichoice_other_it-it_tasks 9 | task: itabench_mmlu_multichoice_miscellaneous_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_philosophy_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "filosofia". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_philosophy' 7 | tag: 8 | - itabench_mmlu_multichoice_humanities_it-it_tasks 9 | task: itabench_mmlu_multichoice_philosophy_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_prehistory_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "preistoria". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_prehistory' 7 | tag: 8 | - itabench_mmlu_multichoice_humanities_it-it_tasks 9 | task: itabench_mmlu_multichoice_prehistory_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_sociology_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "sociologia". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_sociology' 7 | tag: 8 | - itabench_mmlu_multichoice_social_sciences_it-it_tasks 9 | task: itabench_mmlu_multichoice_sociology_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_college_chemistry_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "college 2 | chemistry". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_college_chemistry' 8 | tag: 9 | - itabench_mmlu_cloze_stem_en-en_tasks 10 | task: itabench_mmlu_cloze_college_chemistry_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_computer_security_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "computer 2 | security". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_computer_security' 8 | tag: 9 | - itabench_mmlu_cloze_stem_en-en_tasks 10 | task: itabench_mmlu_cloze_computer_security_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_conceptual_physics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "conceptual 2 | physics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_conceptual_physics' 8 | tag: 9 | - itabench_mmlu_cloze_stem_en-en_tasks 10 | task: itabench_mmlu_cloze_conceptual_physics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_human_sexuality_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "human 2 | sexuality". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_human_sexuality' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_en-en_tasks 10 | task: itabench_mmlu_cloze_human_sexuality_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_professional_law_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "professional 2 | law". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_professional_law' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_en-en_tasks 10 | task: itabench_mmlu_cloze_professional_law_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_philosophy_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "philosophy". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_philosophy' 7 | tag: 8 | - itabench_mmlu_multichoice_humanities_en-en_tasks 9 | task: itabench_mmlu_multichoice_philosophy_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_prehistory_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "prehistory". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_prehistory' 7 | tag: 8 | - itabench_mmlu_multichoice_humanities_en-en_tasks 9 | task: itabench_mmlu_multichoice_prehistory_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_sociology_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "sociology". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_sociology' 7 | tag: 8 | - itabench_mmlu_multichoice_social_sciences_en-en_tasks 9 | task: itabench_mmlu_multichoice_sociology_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_clinical_knowledge_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "conoscenza 2 | clinica". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_clinical_knowledge' 8 | tag: 9 | - itabench_mmlu_cloze_other_it-it_tasks 10 | task: itabench_mmlu_cloze_clinical_knowledge_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_college_chemistry_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "chimica 2 | universitaria". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_college_chemistry' 8 | tag: 9 | - itabench_mmlu_cloze_stem_it-it_tasks 10 | task: itabench_mmlu_cloze_college_chemistry_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_computer_security_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "sicurezza 2 | informatica". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_computer_security' 8 | tag: 9 | - itabench_mmlu_cloze_stem_it-it_tasks 10 | task: itabench_mmlu_cloze_computer_security_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_conceptual_physics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "fisica 2 | concettuale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_conceptual_physics' 8 | tag: 9 | - itabench_mmlu_cloze_stem_it-it_tasks 10 | task: itabench_mmlu_cloze_conceptual_physics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_high_school_biology_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "biologia 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_biology' 8 | tag: 9 | - itabench_mmlu_cloze_stem_it-it_tasks 10 | task: itabench_mmlu_cloze_high_school_biology_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_logical_fallacies_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "fallacie 2 | logiche". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_logical_fallacies' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_it-it_tasks 10 | task: itabench_mmlu_cloze_logical_fallacies_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_professional_law_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "diritto 2 | professionale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_professional_law' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_it-it_tasks 10 | task: itabench_mmlu_cloze_professional_law_it-it 11 | -------------------------------------------------------------------------------- /tasks/adaptations/ghigliottinai/mc/ghigliottinai_mc.yaml: -------------------------------------------------------------------------------- 1 | include: ../_ghigliottinai_yaml 2 | 3 | task: itabench_ghigliottinai_mc 4 | 5 | doc_to_text: "Date le parole: {{w1}}, {{w2}}, {{w3}}, {{w4}}, {{w5}}\nDomanda: Quale tra i seguenti concetti è quello che lega le parole date?\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nRisposta:" 6 | doc_to_target: label 7 | doc_to_choice: ["A", "B", "C", "D"] 8 | -------------------------------------------------------------------------------- /tasks/adaptations/nermud/mc/nermud_wn_mc.yaml: -------------------------------------------------------------------------------- 1 | include: ../_nermud_yaml 2 | 3 | task: itabench_nermud_wn_mc 4 | dataset_path: sapienzanlp/nermud 5 | dataset_name: WN 6 | 7 | doc_to_text: "Data la frase: \"{{text}}\"\nDomanda: A quale tipologia di entità appartiene \"{{target_entity}}\" nella frase precedente?\nA. Luogo\nB. Organizzazione\nC. Persona\nRisposta:" 8 | doc_to_target: label 9 | doc_to_choice: ["A", "B", "C"] 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_clinical_knowledge_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "clinical 2 | knowledge". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_clinical_knowledge' 8 | tag: 9 | - itabench_mmlu_cloze_other_en-en_tasks 10 | task: itabench_mmlu_cloze_clinical_knowledge_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_college_mathematics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "college 2 | mathematics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_college_mathematics' 8 | tag: 9 | - itabench_mmlu_cloze_stem_en-en_tasks 10 | task: itabench_mmlu_cloze_college_mathematics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_high_school_biology_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school biology". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_biology' 8 | tag: 9 | - itabench_mmlu_cloze_stem_en-en_tasks 10 | task: itabench_mmlu_cloze_high_school_biology_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_high_school_physics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school physics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_physics' 8 | tag: 9 | - itabench_mmlu_cloze_stem_en-en_tasks 10 | task: itabench_mmlu_cloze_high_school_physics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_international_law_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "international 2 | law". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_international_law' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_en-en_tasks 10 | task: itabench_mmlu_cloze_international_law_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_logical_fallacies_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "logical 2 | fallacies". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_logical_fallacies' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_en-en_tasks 10 | task: itabench_mmlu_cloze_logical_fallacies_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_public_relations_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "public 2 | relations". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_public_relations' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_en-en_tasks 10 | task: itabench_mmlu_cloze_public_relations_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_security_studies_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "security 2 | studies". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_security_studies' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_en-en_tasks 10 | task: itabench_mmlu_cloze_security_studies_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_human_aging_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "human 2 | aging". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_human_aging' 8 | tag: 9 | - itabench_mmlu_multichoice_other_en-en_tasks 10 | task: itabench_mmlu_multichoice_human_aging_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_miscellaneous_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "miscellaneous". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_miscellaneous' 7 | tag: 8 | - itabench_mmlu_multichoice_other_en-en_tasks 9 | task: itabench_mmlu_multichoice_miscellaneous_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_college_mathematics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "matematica 2 | universitaria". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_college_mathematics' 8 | tag: 9 | - itabench_mmlu_cloze_stem_it-it_tasks 10 | task: itabench_mmlu_cloze_college_mathematics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_high_school_chemistry_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "chimica 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_chemistry' 8 | tag: 9 | - itabench_mmlu_cloze_stem_it-it_tasks 10 | task: itabench_mmlu_cloze_high_school_chemistry_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_international_law_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "diritto 2 | internazionale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_international_law' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_it-it_tasks 10 | task: itabench_mmlu_cloze_international_law_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_professional_accounting_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: "Le seguenti sono domande a scelta multipla (con risposte) su \"contabilit\xE0\ 2 | \ professionale\".\n\n" 3 | include: ../_mmlu_yaml 4 | process_docs: !function 'cloze_utils.process_docs_cloze_professional_accounting' 5 | tag: 6 | - itabench_mmlu_cloze_other_it-it_tasks 7 | task: itabench_mmlu_cloze_professional_accounting_it-it 8 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_public_relations_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "relazioni 2 | pubbliche". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_public_relations' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_it-it_tasks 10 | task: itabench_mmlu_cloze_public_relations_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_security_studies_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "studi 2 | sulla sicurezza". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_security_studies' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_it-it_tasks 10 | task: itabench_mmlu_cloze_security_studies_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_econometrics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "econometria". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_econometrics' 7 | tag: 8 | - itabench_mmlu_multichoice_social_sciences_it-it_tasks 9 | task: itabench_mmlu_multichoice_econometrics_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_global_facts_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "fatti 2 | globali". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_global_facts' 8 | tag: 9 | - itabench_mmlu_multichoice_other_it-it_tasks 10 | task: itabench_mmlu_multichoice_global_facts_it-it 11 | -------------------------------------------------------------------------------- /tasks/adaptations/nermud/mc/nermud_adg_mc.yaml: -------------------------------------------------------------------------------- 1 | include: ../_nermud_yaml 2 | 3 | task: itabench_nermud_adg_mc 4 | dataset_path: sapienzanlp/nermud 5 | dataset_name: ADG 6 | 7 | doc_to_text: "Data la frase: \"{{text}}\"\nDomanda: A quale tipologia di entità appartiene \"{{target_entity}}\" nella frase precedente?\nA. Luogo\nB. Organizzazione\nC. Persona\nRisposta:" 8 | doc_to_target: label 9 | doc_to_choice: ["A", "B", "C"] 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_us_foreign_policy_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "us 2 | foreign policy". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_us_foreign_policy' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_en-en_tasks 10 | task: itabench_mmlu_cloze_us_foreign_policy_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_econometrics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "econometrics". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_econometrics' 7 | tag: 8 | - itabench_mmlu_multichoice_social_sciences_en-en_tasks 9 | task: itabench_mmlu_multichoice_econometrics_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_global_facts_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "global 2 | facts". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_global_facts' 8 | tag: 9 | - itabench_mmlu_multichoice_other_en-en_tasks 10 | task: itabench_mmlu_multichoice_global_facts_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_jurisprudence_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "jurisprudence". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_jurisprudence' 7 | tag: 8 | - itabench_mmlu_multichoice_humanities_en-en_tasks 9 | task: itabench_mmlu_multichoice_jurisprudence_en-en 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_electrical_engineering_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "ingegneria 2 | elettrica". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_electrical_engineering' 8 | tag: 9 | - itabench_mmlu_cloze_stem_it-it_tasks 10 | task: itabench_mmlu_cloze_electrical_engineering_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_high_school_statistics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "statistica 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_statistics' 8 | tag: 9 | - itabench_mmlu_cloze_stem_it-it_tasks 10 | task: itabench_mmlu_cloze_high_school_statistics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_professional_medicine_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "medicina 2 | professionale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_professional_medicine' 8 | tag: 9 | - itabench_mmlu_cloze_other_it-it_tasks 10 | task: itabench_mmlu_cloze_professional_medicine_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_formal_logic_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "logica 2 | formale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_formal_logic' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_it-it_tasks 10 | task: itabench_mmlu_multichoice_formal_logic_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_human_aging_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "invecchiamento 2 | umano". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_human_aging' 8 | tag: 9 | - itabench_mmlu_multichoice_other_it-it_tasks 10 | task: itabench_mmlu_multichoice_human_aging_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_human_sexuality_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: "Le seguenti sono domande a scelta multipla (con risposte) su \"sessualit\xE0\ 2 | \ umana\".\n\n" 3 | include: ../_mmlu_yaml 4 | process_docs: !function 'multichoice_utils.process_docs_multichoice_human_sexuality' 5 | tag: 6 | - itabench_mmlu_multichoice_social_sciences_it-it_tasks 7 | task: itabench_mmlu_multichoice_human_sexuality_it-it 8 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_jurisprudence_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "giurisprudenza". 2 | 3 | 4 | ' 5 | include: ../_mmlu_yaml 6 | process_docs: !function 'multichoice_utils.process_docs_multichoice_jurisprudence' 7 | tag: 8 | - itabench_mmlu_multichoice_humanities_it-it_tasks 9 | task: itabench_mmlu_multichoice_jurisprudence_it-it 10 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_high_school_chemistry_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school chemistry". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_chemistry' 8 | tag: 9 | - itabench_mmlu_cloze_stem_en-en_tasks 10 | task: itabench_mmlu_cloze_high_school_chemistry_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_professional_medicine_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "professional 2 | medicine". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_professional_medicine' 8 | tag: 9 | - itabench_mmlu_cloze_other_en-en_tasks 10 | task: itabench_mmlu_cloze_professional_medicine_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_formal_logic_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "formal 2 | logic". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_formal_logic' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_en-en_tasks 10 | task: itabench_mmlu_multichoice_formal_logic_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_elementary_mathematics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "matematica 2 | elementare". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_elementary_mathematics' 8 | tag: 9 | - itabench_mmlu_cloze_stem_it-it_tasks 10 | task: itabench_mmlu_cloze_elementary_mathematics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_high_school_geography_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "geografia 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_geography' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_it-it_tasks 10 | task: itabench_mmlu_cloze_high_school_geography_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_high_school_mathematics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "matematica 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_mathematics' 8 | tag: 9 | - itabench_mmlu_cloze_stem_it-it_tasks 10 | task: itabench_mmlu_cloze_high_school_mathematics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_business_ethics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "etica 2 | aziendale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_business_ethics' 8 | tag: 9 | - itabench_mmlu_multichoice_other_it-it_tasks 10 | task: itabench_mmlu_multichoice_business_ethics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_electrical_engineering_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "electrical 2 | engineering". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_electrical_engineering' 8 | tag: 9 | - itabench_mmlu_cloze_stem_en-en_tasks 10 | task: itabench_mmlu_cloze_electrical_engineering_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_elementary_mathematics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "elementary 2 | mathematics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_elementary_mathematics' 8 | tag: 9 | - itabench_mmlu_cloze_stem_en-en_tasks 10 | task: itabench_mmlu_cloze_elementary_mathematics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_high_school_mathematics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school mathematics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_mathematics' 8 | tag: 9 | - itabench_mmlu_cloze_stem_en-en_tasks 10 | task: itabench_mmlu_cloze_high_school_mathematics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_high_school_statistics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school statistics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_statistics' 8 | tag: 9 | - itabench_mmlu_cloze_stem_en-en_tasks 10 | task: itabench_mmlu_cloze_high_school_statistics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_business_ethics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "business 2 | ethics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_business_ethics' 8 | tag: 9 | - itabench_mmlu_multichoice_other_en-en_tasks 10 | task: itabench_mmlu_multichoice_business_ethics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_college_biology_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "college 2 | biology". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_college_biology' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_en-en_tasks 10 | task: itabench_mmlu_multichoice_college_biology_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_college_physics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "college 2 | physics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_college_physics' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_en-en_tasks 10 | task: itabench_mmlu_multichoice_college_physics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_moral_disputes_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "moral 2 | disputes". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_moral_disputes' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_en-en_tasks 10 | task: itabench_mmlu_multichoice_moral_disputes_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_high_school_psychology_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "psicologia 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_psychology' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_it-it_tasks 10 | task: itabench_mmlu_cloze_high_school_psychology_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_us_foreign_policy_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "politica 2 | estera degli Stati Uniti". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_us_foreign_policy' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_it-it_tasks 10 | task: itabench_mmlu_cloze_us_foreign_policy_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_abstract_algebra_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "algebra 2 | astratta". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_abstract_algebra' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_it-it_tasks 10 | task: itabench_mmlu_multichoice_abstract_algebra_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_college_physics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "fisica 2 | universitaria". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_college_physics' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_it-it_tasks 10 | task: itabench_mmlu_multichoice_college_physics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_machine_learning_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "machine 2 | learning". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_machine_learning' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_it-it_tasks 10 | task: itabench_mmlu_multichoice_machine_learning_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_medical_genetics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "genetica 2 | medica". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_medical_genetics' 8 | tag: 9 | - itabench_mmlu_multichoice_other_it-it_tasks 10 | task: itabench_mmlu_multichoice_medical_genetics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_moral_disputes_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "dispute 2 | morali". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_moral_disputes' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_it-it_tasks 10 | task: itabench_mmlu_multichoice_moral_disputes_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_moral_scenarios_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "scenari 2 | morali". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_moral_scenarios' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_it-it_tasks 10 | task: itabench_mmlu_multichoice_moral_scenarios_it-it 11 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/_fewshot_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/BBH_italian 2 | output_type: multiple_choice 3 | test_split: test 4 | doc_to_text: 'D: {{input_translated}} 5 | 6 | R:' 7 | doc_to_target: "{{target_translated}}" 8 | metric_list: 9 | - metric: acc_norm 10 | aggregation: mean 11 | higher_is_better: true 12 | num_fewshot: 3 13 | fewshot_config: 14 | sampler: first_n 15 | metadata: 16 | version: 1.0 17 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_college_computer_science_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "college 2 | computer science". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_college_computer_science' 8 | tag: 9 | - itabench_mmlu_cloze_stem_en-en_tasks 10 | task: itabench_mmlu_cloze_college_computer_science_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_high_school_geography_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school geography". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_geography' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_en-en_tasks 10 | task: itabench_mmlu_cloze_high_school_geography_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_high_school_us_history_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school us history". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_us_history' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_en-en_tasks 10 | task: itabench_mmlu_cloze_high_school_us_history_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_professional_accounting_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "professional 2 | accounting". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_professional_accounting' 8 | tag: 9 | - itabench_mmlu_cloze_other_en-en_tasks 10 | task: itabench_mmlu_cloze_professional_accounting_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_abstract_algebra_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "abstract 2 | algebra". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_abstract_algebra' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_en-en_tasks 10 | task: itabench_mmlu_multichoice_abstract_algebra_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_college_medicine_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "college 2 | medicine". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_college_medicine' 8 | tag: 9 | - itabench_mmlu_multichoice_other_en-en_tasks 10 | task: itabench_mmlu_multichoice_college_medicine_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_machine_learning_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "machine 2 | learning". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_machine_learning' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_en-en_tasks 10 | task: itabench_mmlu_multichoice_machine_learning_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_medical_genetics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "medical 2 | genetics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_medical_genetics' 8 | tag: 9 | - itabench_mmlu_multichoice_other_en-en_tasks 10 | task: itabench_mmlu_multichoice_medical_genetics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_moral_scenarios_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "moral 2 | scenarios". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_moral_scenarios' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_en-en_tasks 10 | task: itabench_mmlu_multichoice_moral_scenarios_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_world_religions_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "world 2 | religions". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_world_religions' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_en-en_tasks 10 | task: itabench_mmlu_multichoice_world_religions_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_college_computer_science_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "informatica 2 | universitaria". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_college_computer_science' 8 | tag: 9 | - itabench_mmlu_cloze_stem_it-it_tasks 10 | task: itabench_mmlu_cloze_college_computer_science_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_college_biology_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "biologia 2 | universitaria". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_college_biology' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_it-it_tasks 10 | task: itabench_mmlu_multichoice_college_biology_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_college_medicine_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "medicina 2 | universitaria". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_college_medicine' 8 | tag: 9 | - itabench_mmlu_multichoice_other_it-it_tasks 10 | task: itabench_mmlu_multichoice_college_medicine_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_high_school_physics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "fisica 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_physics' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_it-it_tasks 10 | task: itabench_mmlu_multichoice_high_school_physics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_world_religions_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "religioni 2 | mondiali". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_world_religions' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_it-it_tasks 10 | task: itabench_mmlu_multichoice_world_religions_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_high_school_psychology_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school psychology". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_psychology' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_en-en_tasks 10 | task: itabench_mmlu_cloze_high_school_psychology_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_college_chemistry_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "college 2 | chemistry". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_college_chemistry' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_en-en_tasks 10 | task: itabench_mmlu_multichoice_college_chemistry_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_computer_security_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "computer 2 | security". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_computer_security' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_en-en_tasks 10 | task: itabench_mmlu_multichoice_computer_security_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_conceptual_physics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "conceptual 2 | physics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_conceptual_physics' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_en-en_tasks 10 | task: itabench_mmlu_multichoice_conceptual_physics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_human_sexuality_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "human 2 | sexuality". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_human_sexuality' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_en-en_tasks 10 | task: itabench_mmlu_multichoice_human_sexuality_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_professional_law_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "professional 2 | law". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_professional_law' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_en-en_tasks 10 | task: itabench_mmlu_multichoice_professional_law_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_high_school_computer_science_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "informatica 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_computer_science' 8 | tag: 9 | - itabench_mmlu_cloze_stem_it-it_tasks 10 | task: itabench_mmlu_cloze_high_school_computer_science_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_high_school_us_history_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "storia 2 | degli Stati Uniti liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_us_history' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_it-it_tasks 10 | task: itabench_mmlu_cloze_high_school_us_history_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_high_school_world_history_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "storia 2 | del mondo liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_world_history' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_it-it_tasks 10 | task: itabench_mmlu_cloze_high_school_world_history_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_professional_psychology_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "psicologia 2 | professionale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_professional_psychology' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_it-it_tasks 10 | task: itabench_mmlu_cloze_professional_psychology_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_clinical_knowledge_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "conoscenza 2 | clinica". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_clinical_knowledge' 8 | tag: 9 | - itabench_mmlu_multichoice_other_it-it_tasks 10 | task: itabench_mmlu_multichoice_clinical_knowledge_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_college_chemistry_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "chimica 2 | universitaria". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_college_chemistry' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_it-it_tasks 10 | task: itabench_mmlu_multichoice_college_chemistry_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_computer_security_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "sicurezza 2 | informatica". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_computer_security' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_it-it_tasks 10 | task: itabench_mmlu_multichoice_computer_security_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_conceptual_physics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "fisica 2 | concettuale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_conceptual_physics' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_it-it_tasks 10 | task: itabench_mmlu_multichoice_conceptual_physics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_high_school_biology_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "biologia 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_biology' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_it-it_tasks 10 | task: itabench_mmlu_multichoice_high_school_biology_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_logical_fallacies_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "fallacie 2 | logiche". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_logical_fallacies' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_it-it_tasks 10 | task: itabench_mmlu_multichoice_logical_fallacies_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_professional_law_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "diritto 2 | professionale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_professional_law' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_it-it_tasks 10 | task: itabench_mmlu_multichoice_professional_law_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_high_school_world_history_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school world history". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_world_history' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_en-en_tasks 10 | task: itabench_mmlu_cloze_high_school_world_history_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_professional_psychology_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "professional 2 | psychology". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_professional_psychology' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_en-en_tasks 10 | task: itabench_mmlu_cloze_professional_psychology_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_clinical_knowledge_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "clinical 2 | knowledge". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_clinical_knowledge' 8 | tag: 9 | - itabench_mmlu_multichoice_other_en-en_tasks 10 | task: itabench_mmlu_multichoice_clinical_knowledge_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_college_mathematics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "college 2 | mathematics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_college_mathematics' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_en-en_tasks 10 | task: itabench_mmlu_multichoice_college_mathematics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_high_school_biology_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school biology". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_biology' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_en-en_tasks 10 | task: itabench_mmlu_multichoice_high_school_biology_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_high_school_physics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school physics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_physics' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_en-en_tasks 10 | task: itabench_mmlu_multichoice_high_school_physics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_international_law_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "international 2 | law". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_international_law' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_en-en_tasks 10 | task: itabench_mmlu_multichoice_international_law_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_logical_fallacies_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "logical 2 | fallacies". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_logical_fallacies' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_en-en_tasks 10 | task: itabench_mmlu_multichoice_logical_fallacies_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_public_relations_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "public 2 | relations". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_public_relations' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_en-en_tasks 10 | task: itabench_mmlu_multichoice_public_relations_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_security_studies_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "security 2 | studies". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_security_studies' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_en-en_tasks 10 | task: itabench_mmlu_multichoice_security_studies_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_high_school_macroeconomics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "macroeconomia 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_macroeconomics' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_it-it_tasks 10 | task: itabench_mmlu_cloze_high_school_macroeconomics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_high_school_microeconomics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "microeconomia 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_microeconomics' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_it-it_tasks 10 | task: itabench_mmlu_cloze_high_school_microeconomics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_college_mathematics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "matematica 2 | universitaria". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_college_mathematics' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_it-it_tasks 10 | task: itabench_mmlu_multichoice_college_mathematics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_high_school_chemistry_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "chimica 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_chemistry' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_it-it_tasks 10 | task: itabench_mmlu_multichoice_high_school_chemistry_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_international_law_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "diritto 2 | internazionale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_international_law' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_it-it_tasks 10 | task: itabench_mmlu_multichoice_international_law_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_professional_accounting_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: "Le seguenti sono domande a scelta multipla (con risposte) su \"contabilit\xE0\ 2 | \ professionale\".\n\n" 3 | include: ../_mmlu_yaml 4 | process_docs: !function 'multichoice_utils.process_docs_multichoice_professional_accounting' 5 | tag: 6 | - itabench_mmlu_multichoice_other_it-it_tasks 7 | task: itabench_mmlu_multichoice_professional_accounting_it-it 8 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_public_relations_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "relazioni 2 | pubbliche". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_public_relations' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_it-it_tasks 10 | task: itabench_mmlu_multichoice_public_relations_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_security_studies_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "studi 2 | sulla sicurezza". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_security_studies' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_it-it_tasks 10 | task: itabench_mmlu_multichoice_security_studies_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_high_school_computer_science_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school computer science". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_computer_science' 8 | tag: 9 | - itabench_mmlu_cloze_stem_en-en_tasks 10 | task: itabench_mmlu_cloze_high_school_computer_science_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_us_foreign_policy_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "us 2 | foreign policy". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_us_foreign_policy' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_en-en_tasks 10 | task: itabench_mmlu_multichoice_us_foreign_policy_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_high_school_european_history_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "storia 2 | europea liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_european_history' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_it-it_tasks 10 | task: itabench_mmlu_cloze_high_school_european_history_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_electrical_engineering_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "ingegneria 2 | elettrica". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_electrical_engineering' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_it-it_tasks 10 | task: itabench_mmlu_multichoice_electrical_engineering_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_high_school_statistics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "statistica 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_statistics' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_it-it_tasks 10 | task: itabench_mmlu_multichoice_high_school_statistics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_professional_medicine_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "medicina 2 | professionale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_professional_medicine' 8 | tag: 9 | - itabench_mmlu_multichoice_other_it-it_tasks 10 | task: itabench_mmlu_multichoice_professional_medicine_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_high_school_macroeconomics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school macroeconomics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_macroeconomics' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_en-en_tasks 10 | task: itabench_mmlu_cloze_high_school_macroeconomics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_high_school_microeconomics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school microeconomics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_microeconomics' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_en-en_tasks 10 | task: itabench_mmlu_cloze_high_school_microeconomics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_high_school_chemistry_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school chemistry". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_chemistry' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_en-en_tasks 10 | task: itabench_mmlu_multichoice_high_school_chemistry_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_professional_medicine_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "professional 2 | medicine". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_professional_medicine' 8 | tag: 9 | - itabench_mmlu_multichoice_other_en-en_tasks 10 | task: itabench_mmlu_multichoice_professional_medicine_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_elementary_mathematics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "matematica 2 | elementare". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_elementary_mathematics' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_it-it_tasks 10 | task: itabench_mmlu_multichoice_elementary_mathematics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_high_school_geography_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "geografia 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_geography' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_it-it_tasks 10 | task: itabench_mmlu_multichoice_high_school_geography_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_high_school_mathematics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "matematica 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_mathematics' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_it-it_tasks 10 | task: itabench_mmlu_multichoice_high_school_mathematics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_high_school_european_history_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school european history". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_european_history' 8 | tag: 9 | - itabench_mmlu_cloze_humanities_en-en_tasks 10 | task: itabench_mmlu_cloze_high_school_european_history_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_electrical_engineering_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "electrical 2 | engineering". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_electrical_engineering' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_en-en_tasks 10 | task: itabench_mmlu_multichoice_electrical_engineering_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_elementary_mathematics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "elementary 2 | mathematics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_elementary_mathematics' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_en-en_tasks 10 | task: itabench_mmlu_multichoice_elementary_mathematics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_high_school_mathematics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school mathematics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_mathematics' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_en-en_tasks 10 | task: itabench_mmlu_multichoice_high_school_mathematics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_high_school_statistics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school statistics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_statistics' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_en-en_tasks 10 | task: itabench_mmlu_multichoice_high_school_statistics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_high_school_psychology_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "psicologia 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_psychology' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_it-it_tasks 10 | task: itabench_mmlu_multichoice_high_school_psychology_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_us_foreign_policy_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "politica 2 | estera degli Stati Uniti". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_us_foreign_policy' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_it-it_tasks 10 | task: itabench_mmlu_multichoice_us_foreign_policy_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_college_computer_science_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "college 2 | computer science". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_college_computer_science' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_en-en_tasks 10 | task: itabench_mmlu_multichoice_college_computer_science_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_high_school_geography_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school geography". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_geography' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_en-en_tasks 10 | task: itabench_mmlu_multichoice_high_school_geography_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_high_school_us_history_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school us history". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_us_history' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_en-en_tasks 10 | task: itabench_mmlu_multichoice_high_school_us_history_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_professional_accounting_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "professional 2 | accounting". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_professional_accounting' 8 | tag: 9 | - itabench_mmlu_multichoice_other_en-en_tasks 10 | task: itabench_mmlu_multichoice_professional_accounting_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_college_computer_science_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "informatica 2 | universitaria". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_college_computer_science' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_it-it_tasks 10 | task: itabench_mmlu_multichoice_college_computer_science_it-it 11 | -------------------------------------------------------------------------------- /tasks/adaptations/pretens/_pretens_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/pretens 2 | output_type: multiple_choice 3 | 4 | description: "Indica se le seguenti frasi hanno senso.\n\n" 5 | 6 | training_split: train 7 | validation_split: train 8 | test_split: test 9 | 10 | metric_list: 11 | - metric: acc 12 | aggregation: mean 13 | higher_is_better: true 14 | - metric: acc_norm 15 | aggregation: mean 16 | higher_is_better: true 17 | 18 | metadata: 19 | version: 1.0 -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_high_school_psychology_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school psychology". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_psychology' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_en-en_tasks 10 | task: itabench_mmlu_multichoice_high_school_psychology_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_high_school_computer_science_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "informatica 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_computer_science' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_it-it_tasks 10 | task: itabench_mmlu_multichoice_high_school_computer_science_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_high_school_us_history_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "storia 2 | degli Stati Uniti liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_us_history' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_it-it_tasks 10 | task: itabench_mmlu_multichoice_high_school_us_history_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_high_school_world_history_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "storia 2 | del mondo liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_world_history' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_it-it_tasks 10 | task: itabench_mmlu_multichoice_high_school_world_history_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_professional_psychology_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "psicologia 2 | professionale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_professional_psychology' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_it-it_tasks 10 | task: itabench_mmlu_multichoice_professional_psychology_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/truthful_qa/_truthful_qa_mc1_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/truthful_qa_italian 2 | output_type: multiple_choice 3 | 4 | validation_split: validation 5 | 6 | doc_to_text: "query" 7 | doc_to_choice: "choices" 8 | doc_to_target: "gold" 9 | 10 | should_decontaminate: true 11 | doc_to_decontamination_query: "query" 12 | 13 | metric_list: 14 | - metric: acc 15 | aggregation: mean 16 | higher_is_better: true 17 | 18 | metadata: 19 | version: 1.0 20 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_high_school_world_history_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school world history". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_world_history' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_en-en_tasks 10 | task: itabench_mmlu_multichoice_high_school_world_history_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_professional_psychology_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "professional 2 | psychology". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_professional_psychology' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_en-en_tasks 10 | task: itabench_mmlu_multichoice_professional_psychology_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_high_school_macroeconomics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "macroeconomia 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_macroeconomics' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_it-it_tasks 10 | task: itabench_mmlu_multichoice_high_school_macroeconomics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_high_school_microeconomics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "microeconomia 2 | liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_microeconomics' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_it-it_tasks 10 | task: itabench_mmlu_multichoice_high_school_microeconomics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_high_school_computer_science_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school computer science". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_computer_science' 8 | tag: 9 | - itabench_mmlu_multichoice_stem_en-en_tasks 10 | task: itabench_mmlu_multichoice_high_school_computer_science_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_cloze_high_school_government_and_politics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "governo 2 | e politica liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_government_and_politics' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_it-it_tasks 10 | task: itabench_mmlu_cloze_high_school_government_and_politics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_high_school_european_history_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "storia 2 | europea liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_european_history' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_it-it_tasks 10 | task: itabench_mmlu_multichoice_high_school_european_history_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_high_school_macroeconomics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school macroeconomics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_macroeconomics' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_en-en_tasks 10 | task: itabench_mmlu_multichoice_high_school_macroeconomics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_high_school_microeconomics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school microeconomics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_microeconomics' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_en-en_tasks 10 | task: itabench_mmlu_multichoice_high_school_microeconomics_en-en 11 | -------------------------------------------------------------------------------- /tasks/adaptations/ami/_ami_behaviour_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/ami 2 | output_type: multiple_choice 3 | 4 | description: "Indica il livello di misoginia presente nei seguenti tweet.\n\n" 5 | 6 | training_split: train 7 | validation_split: train 8 | test_split: test 9 | 10 | metric_list: 11 | - metric: acc 12 | aggregation: mean 13 | higher_is_better: true 14 | - metric: acc_norm 15 | aggregation: mean 16 | higher_is_better: true 17 | 18 | metadata: 19 | version: 1.0 -------------------------------------------------------------------------------- /tasks/adaptations/ami/_ami_synth_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/ami 2 | output_type: multiple_choice 3 | 4 | description: "Indica se i seguenti tweet presentano caratteristiche misogine.\n\n" 5 | 6 | training_split: train 7 | validation_split: train 8 | test_split: test 9 | 10 | metric_list: 11 | - metric: acc 12 | aggregation: mean 13 | higher_is_better: true 14 | - metric: acc_norm 15 | aggregation: mean 16 | higher_is_better: true 17 | 18 | metadata: 19 | version: 1.0 -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_cloze_high_school_government_and_politics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school government and politics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'cloze_utils.process_docs_cloze_high_school_government_and_politics' 8 | tag: 9 | - itabench_mmlu_cloze_social_sciences_en-en_tasks 10 | task: itabench_mmlu_cloze_high_school_government_and_politics_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_high_school_european_history_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school european history". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_european_history' 8 | tag: 9 | - itabench_mmlu_multichoice_humanities_en-en_tasks 10 | task: itabench_mmlu_multichoice_high_school_european_history_en-en 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/it-it/itabench_mmlu_multichoice_high_school_government_and_politics_it-it.yaml: -------------------------------------------------------------------------------- 1 | description: 'Le seguenti sono domande a scelta multipla (con risposte) su "governo 2 | e politica liceale". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_government_and_politics' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_it-it_tasks 10 | task: itabench_mmlu_multichoice_high_school_government_and_politics_it-it 11 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/en-en/itabench_mmlu_multichoice_high_school_government_and_politics_en-en.yaml: -------------------------------------------------------------------------------- 1 | description: 'The following are multiple choice questions (with answers) about "high 2 | school government and politics". 3 | 4 | 5 | ' 6 | include: ../_mmlu_yaml 7 | process_docs: !function 'multichoice_utils.process_docs_multichoice_high_school_government_and_politics' 8 | tag: 9 | - itabench_mmlu_multichoice_social_sciences_en-en_tasks 10 | task: itabench_mmlu_multichoice_high_school_government_and_politics_en-en 11 | -------------------------------------------------------------------------------- /tasks/adaptations/nermud/_nermud_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/nermud 2 | output_type: multiple_choice 3 | 4 | description: "Data una frase e un'entità, indica se tale entità rappresenta un luogo, un'organizzazione o una persona.\n\n" 5 | 6 | training_split: train 7 | validation_split: train 8 | test_split: test 9 | 10 | metric_list: 11 | - metric: acc 12 | aggregation: mean 13 | higher_is_better: true 14 | - metric: acc_norm 15 | aggregation: mean 16 | higher_is_better: true 17 | 18 | metadata: 19 | version: 1.0 -------------------------------------------------------------------------------- /tasks/translations/truthful_qa/_truthful_qa_mc2_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/truthful_qa_italian 2 | output_type: multiple_choice 3 | 4 | validation_split: validation 5 | 6 | doc_to_text: "query" 7 | doc_to_choice: "choices" 8 | doc_to_target: "gold" 9 | 10 | process_results: !function utils.process_results_mc2 11 | 12 | should_decontaminate: true 13 | doc_to_decontamination_query: "query" 14 | 15 | metric_list: 16 | - metric: acc 17 | aggregation: mean 18 | higher_is_better: true 19 | 20 | metadata: 21 | version: 1.0 22 | -------------------------------------------------------------------------------- /tasks/adaptations/wic/_wic_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/wic 2 | output_type: multiple_choice 3 | 4 | description: "Date due frasi, che contengono un lemma in comune, indica se tale lemma ha lo stesso significato in entrambe le frasi.\n\n" 5 | 6 | training_split: validation 7 | validation_split: validation 8 | test_split: test 9 | 10 | metric_list: 11 | - metric: acc 12 | aggregation: mean 13 | higher_is_better: true 14 | - metric: acc_norm 15 | aggregation: mean 16 | higher_is_better: true 17 | 18 | metadata: 19 | version: 1.0 -------------------------------------------------------------------------------- /tasks/leaderboard_it/mmlu_pro_it/mmlu_pro.yaml: -------------------------------------------------------------------------------- 1 | task: itabench_leaderboard_mmlu_pro_it 2 | 3 | dataset_path: li-lab/MMLU-ProX 4 | dataset_name: it 5 | 6 | test_split: test 7 | fewshot_split: validation 8 | fewshot_config: 9 | sampler: first_n 10 | output_type: multiple_choice 11 | doc_to_text: !function utils.doc_to_text 12 | doc_to_choice: !function utils.doc_to_choice 13 | doc_to_target: answer 14 | metric_list: 15 | - metric: acc 16 | aggregation: mean 17 | higher_is_better: true 18 | num_fewshot: 5 19 | metadata: 20 | version: 0.1 21 | -------------------------------------------------------------------------------- /tasks/translations/gsm8k/_gsm8k_multichoice_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/gsm8k_italian 2 | output_type: multiple_choice 3 | 4 | training_split: train 5 | fewshot_split: train 6 | test_split: test 7 | 8 | doc_to_text: "query" 9 | doc_to_choice: "choices" 10 | doc_to_target: "gold" 11 | 12 | should_decontaminate: true 13 | doc_to_decontamination_query: "query" 14 | 15 | metric_list: 16 | - metric: acc 17 | aggregation: mean 18 | higher_is_better: true 19 | - metric: acc_norm 20 | aggregation: mean 21 | higher_is_better: true 22 | 23 | metadata: 24 | version: 1.0 25 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/boolean_expressions.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: boolean_expressions 2 | description: "Valuta il risultato di un'espressione booleana casuale." 3 | doc_to_choice: ["Falso", "Vero"] 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - input_translated: no ( ( no no Vero ) ) è 8 | target_translated: 'Falso' 9 | - input_translated: Vero e Falso e no Vero e Vero è 10 | target_translated: 'Falso' 11 | - input_translated: no no ( no ( Falso ) ) è 12 | target_translated: 'Vero' 13 | include: _fewshot_template_yaml 14 | task: leaderboard_bbh_boolean_expressions 15 | -------------------------------------------------------------------------------- /tasks/translations/mmlu/_mmlu_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/mmlu_italian 2 | output_type: multiple_choice 3 | 4 | validation_split: validation 5 | test_split: test 6 | fewshot_split: validation 7 | target_delimiter: " " 8 | 9 | doc_to_text: "query" 10 | doc_to_choice: "choices" 11 | doc_to_target: "gold" 12 | 13 | should_decontaminate: true 14 | doc_to_decontamination_query: "query" 15 | 16 | metric_list: 17 | - metric: acc 18 | aggregation: mean 19 | higher_is_better: true 20 | - metric: acc_norm 21 | aggregation: mean 22 | higher_is_better: true 23 | 24 | metadata: 25 | version: 1.0 26 | -------------------------------------------------------------------------------- /tasks/translations/piqa/_piqa_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/piqa_italian 2 | output_type: multiple_choice 3 | 4 | training_split: train 5 | validation_split: validation 6 | fewshot_split: train 7 | target_delimiter: "" 8 | 9 | doc_to_text: "query" 10 | doc_to_choice: "choices" 11 | doc_to_target: "gold" 12 | 13 | should_decontaminate: true 14 | doc_to_decontamination_query: "query" 15 | 16 | metric_list: 17 | - metric: acc 18 | aggregation: mean 19 | higher_is_better: true 20 | - metric: acc_norm 21 | aggregation: mean 22 | higher_is_better: true 23 | 24 | metadata: 25 | version: 1.0 26 | -------------------------------------------------------------------------------- /tasks/translations/sciq/_sciq_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/sciq_italian 2 | output_type: multiple_choice 3 | 4 | validation_split: validation 5 | test_split: test 6 | fewshot_split: validation 7 | target_delimiter: "" 8 | 9 | doc_to_text: "query" 10 | doc_to_choice: "choices" 11 | doc_to_target: "gold" 12 | 13 | should_decontaminate: true 14 | doc_to_decontamination_query: "query" 15 | 16 | metric_list: 17 | - metric: acc 18 | aggregation: mean 19 | higher_is_better: true 20 | - metric: acc_norm 21 | aggregation: mean 22 | higher_is_better: true 23 | 24 | metadata: 25 | version: 1.0 26 | -------------------------------------------------------------------------------- /tasks/translations/boolq/_boolq_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/boolq_italian 2 | output_type: multiple_choice 3 | 4 | training_split: train 5 | fewshot_split: train 6 | validation_split: validation 7 | target_delimiter: "" 8 | 9 | doc_to_text: "query" 10 | doc_to_choice: "choices" 11 | doc_to_target: "gold" 12 | 13 | should_decontaminate: true 14 | doc_to_decontamination_query: "query" 15 | 16 | metric_list: 17 | - metric: acc 18 | aggregation: mean 19 | higher_is_better: true 20 | - metric: acc_norm 21 | aggregation: mean 22 | higher_is_better: true 23 | 24 | metadata: 25 | version: 1.0 26 | -------------------------------------------------------------------------------- /tasks/translations/itabench.trans.en-en.yaml: -------------------------------------------------------------------------------- 1 | group: itabench_trans_en-en 2 | 3 | task: 4 | - itabench_arc_challenge_en-en 5 | - itabench_arc_easy_en-en 6 | - itabench_boolq_en-en 7 | - itabench_boolq_with_passages_en-en 8 | - itabench_gsm8k_multichoice_en-en 9 | - itabench_hellaswag_en-en 10 | - itabench_mmlu_multichoice_en-en 11 | - itabench_mmlu_cloze_en-en 12 | - itabench_piqa_en-en 13 | - itabench_sciq_en-en 14 | - itabench_sciq_with_passages_en-en 15 | - itabench_truthful_qa_mc1_en-en 16 | - itabench_truthful_qa_mc2_en-en 17 | - itabench_winogrande_en-en 18 | 19 | metadata: 20 | version: 1.0 -------------------------------------------------------------------------------- /tasks/translations/itabench.trans.it-it.yaml: -------------------------------------------------------------------------------- 1 | group: itabench_trans_it-it 2 | 3 | task: 4 | - itabench_arc_challenge_it-it 5 | - itabench_arc_easy_it-it 6 | - itabench_boolq_it-it 7 | - itabench_boolq_with_passages_it-it 8 | - itabench_gsm8k_multichoice_it-it 9 | - itabench_hellaswag_it-it 10 | - itabench_mmlu_multichoice_it-it 11 | - itabench_mmlu_cloze_it-it 12 | - itabench_piqa_it-it 13 | - itabench_sciq_it-it 14 | - itabench_sciq_with_passages_it-it 15 | - itabench_truthful_qa_mc1_it-it 16 | - itabench_truthful_qa_mc2_it-it 17 | - itabench_winogrande_it-it 18 | 19 | metadata: 20 | version: 1.0 -------------------------------------------------------------------------------- /tasks/translations/winogrande/_winogrande_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/winogrande_italian 2 | output_type: multiple_choice 3 | 4 | training_split: train 5 | validation_split: validation 6 | fewshot_split: train 7 | target_delimiter: "" 8 | 9 | doc_to_text: "query" 10 | doc_to_choice: "choices" 11 | doc_to_target: "gold" 12 | 13 | should_decontaminate: true 14 | doc_to_decontamination_query: "query" 15 | 16 | metric_list: 17 | - metric: acc 18 | aggregation: mean 19 | higher_is_better: true 20 | - metric: acc_norm 21 | aggregation: mean 22 | higher_is_better: true 23 | 24 | metadata: 25 | version: 1.0 26 | -------------------------------------------------------------------------------- /tasks/translations/arc_easy/_arc_easy_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/arc_italian 2 | dataset_name: arc_easy 3 | output_type: multiple_choice 4 | 5 | training_split: train 6 | validation_split: validation 7 | test_split: test 8 | fewshot_split: validation 9 | 10 | doc_to_text: "query" 11 | doc_to_choice: "choices" 12 | doc_to_target: "gold" 13 | 14 | should_decontaminate: true 15 | doc_to_decontamination_query: "query" 16 | 17 | metric_list: 18 | - metric: acc 19 | aggregation: mean 20 | higher_is_better: true 21 | - metric: acc_norm 22 | aggregation: mean 23 | higher_is_better: true 24 | 25 | metadata: 26 | version: 1.0 27 | -------------------------------------------------------------------------------- /tasks/adaptations/ghigliottinai/_ghigliottinai_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/ghigliottinai 2 | output_type: multiple_choice 3 | 4 | description: "Ti viene chiesto di risolvere il gioco della ghigliottina.\nIl gioco della ghigliottina consiste nel trovare un concetto che lega cinque parole date. Tale concetto è esprimibile tramite una singola parola.\n\n" 5 | 6 | training_split: train 7 | validation_split: train 8 | test_split: test 9 | 10 | metric_list: 11 | - metric: acc 12 | aggregation: mean 13 | higher_is_better: true 14 | - metric: acc_norm 15 | aggregation: mean 16 | higher_is_better: true 17 | 18 | metadata: 19 | version: 1.0 -------------------------------------------------------------------------------- /tasks/leaderboard_it/musr_it/utils.py: -------------------------------------------------------------------------------- 1 | 2 | def doc_to_choice(doc) -> list[str]: 3 | """ 4 | Convert a doc to a choice. 5 | """ 6 | return doc["choice_translations"] 7 | 8 | 9 | DOC_TO_TEXT = "{narrative}\n\n" "{question}\n\n" "{choices}\n" "Risposta:" 10 | 11 | 12 | def doc_to_text(doc) -> str: 13 | """ 14 | Convert a doc to text. 15 | """ 16 | choices = "" 17 | for i, choice in enumerate(doc["choice_translations"]): 18 | choices += f"{i+1} - {choice}\n" 19 | 20 | text = DOC_TO_TEXT.format( 21 | narrative=doc["narrative_translation"], question=doc["question_translation"], choices=choices 22 | ) 23 | 24 | return text 25 | -------------------------------------------------------------------------------- /tasks/translations/hellaswag/_hellaswag_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/hellaswag_italian 2 | dataset_name: all 3 | output_type: multiple_choice 4 | 5 | training_split: train 6 | validation_split: validation 7 | 8 | doc_to_text: "query" 9 | doc_to_choice: "choices" 10 | doc_to_target: "gold" 11 | 12 | fewshot_split: validation 13 | fewshot_delimiter: "\n\n" 14 | target_delimiter: " " 15 | 16 | should_decontaminate: true 17 | doc_to_decontamination_query: "query" 18 | 19 | metric_list: 20 | - metric: acc 21 | aggregation: mean 22 | higher_is_better: true 23 | - metric: acc_norm 24 | aggregation: mean 25 | higher_is_better: true 26 | 27 | metadata: 28 | version: 1.0 29 | -------------------------------------------------------------------------------- /tasks/adaptations/prelearn/_prelearn_physics_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/prelearn 2 | dataset_name: physics 3 | output_type: multiple_choice 4 | 5 | description: "Dati due concetti A e B, indica se il primo concetto è un prerequisito per il secondo.\nIl concetto A è prerequisito per il concetto B, se per comprendere B devi prima aver compreso A.\nI seguenti concetti appartengono al dominio: Fisica.\n\n" 6 | 7 | training_split: train 8 | validation_split: train 9 | test_split: test 10 | 11 | metric_list: 12 | - metric: acc 13 | aggregation: mean 14 | higher_is_better: true 15 | - metric: acc_norm 16 | aggregation: mean 17 | higher_is_better: true 18 | 19 | metadata: 20 | version: 1.0 -------------------------------------------------------------------------------- /tasks/adaptations/prelearn/_prelearn_geometry_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/prelearn 2 | dataset_name: geometry 3 | output_type: multiple_choice 4 | 5 | description: "Dati due concetti A e B, indica se il primo concetto è un prerequisito per il secondo.\nIl concetto A è prerequisito per il concetto B, se per comprendere B devi prima aver compreso A.\nI seguenti concetti appartengono al dominio: Geometria.\n\n" 6 | 7 | training_split: train 8 | validation_split: train 9 | test_split: test 10 | 11 | metric_list: 12 | - metric: acc 13 | aggregation: mean 14 | higher_is_better: true 15 | - metric: acc_norm 16 | aggregation: mean 17 | higher_is_better: true 18 | 19 | metadata: 20 | version: 1.0 -------------------------------------------------------------------------------- /tasks/adaptations/prelearn/_prelearn_precalculus_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/prelearn 2 | dataset_name: precalculus 3 | output_type: multiple_choice 4 | 5 | description: "Dati due concetti A e B, indica se il primo concetto è un prerequisito per il secondo.\nIl concetto A è prerequisito per il concetto B, se per comprendere B devi prima aver compreso A.\nI seguenti concetti appartengono al dominio: Calcolo.\n\n" 6 | 7 | training_split: train 8 | validation_split: train 9 | test_split: test 10 | 11 | metric_list: 12 | - metric: acc 13 | aggregation: mean 14 | higher_is_better: true 15 | - metric: acc_norm 16 | aggregation: mean 17 | higher_is_better: true 18 | 19 | metadata: 20 | version: 1.0 -------------------------------------------------------------------------------- /tasks/adaptations/prelearn/_prelearn_data_mining_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/prelearn 2 | dataset_name: data_mining 3 | output_type: multiple_choice 4 | 5 | description: "Dati due concetti A e B, indica se il primo concetto è un prerequisito per il secondo.\nIl concetto A è prerequisito per il concetto B, se per comprendere B devi prima aver compreso A.\nI seguenti concetti appartengono al dominio: Data Mining.\n\n" 6 | 7 | training_split: train 8 | validation_split: train 9 | test_split: test 10 | 11 | metric_list: 12 | - metric: acc 13 | aggregation: mean 14 | higher_is_better: true 15 | - metric: acc_norm 16 | aggregation: mean 17 | higher_is_better: true 18 | 19 | metadata: 20 | version: 1.0 -------------------------------------------------------------------------------- /tasks/translations/arc_challenge/_arc_challenge_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/arc_italian 2 | dataset_name: arc_challenge 3 | output_type: multiple_choice 4 | 5 | training_split: train 6 | validation_split: validation 7 | test_split: test 8 | 9 | fewshot_split: validation 10 | fewshot_delimiter: "\n\n" 11 | target_delimiter: " " 12 | 13 | doc_to_text: "query" 14 | doc_to_choice: "choices" 15 | doc_to_target: "gold" 16 | 17 | should_decontaminate: true 18 | doc_to_decontamination_query: "query" 19 | 20 | metric_list: 21 | - metric: acc 22 | aggregation: mean 23 | higher_is_better: true 24 | - metric: acc_norm 25 | aggregation: mean 26 | higher_is_better: true 27 | 28 | metadata: 29 | version: 1.0 30 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/gpqa_it/_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/gpqa_italian 2 | output_type: multiple_choice 3 | process_docs: !function utils.process_docs 4 | training_split: test 5 | # Because huggingface dataset only has train split 6 | validation_split: test 7 | test_split: null 8 | doc_to_text: "Qual è la risposta corretta a questa domanda:{{input_text_translation}}\nScelte:\n(A) {{choice1}}\n(B) {{choice2}}\n(C) {{choice3}}\n(D) {{choice4}}\nRisposta: " 9 | doc_to_target: answer 10 | doc_to_choice: ["(A)", "(B)", "(C)", "(D)"] 11 | num_fewshot: 0 12 | metric_list: 13 | - metric: acc_norm 14 | aggregation: mean 15 | higher_is_better: true 16 | metadata: 17 | version: 1.0 18 | fewshot_config: 19 | sampler: first_n 20 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/sports_understanding.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: sports_understanding 2 | description: 'Determina se una frase costruita artificialmente relativa allo sport è plausibile o meno.' 3 | doc_to_choice: ["sì","no"] 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - input_translated: È plausibile la seguente frase? "Bam Adebayo ha segnato un reverse layup nelle finali della Western Conference." 8 | target_translated: 'sì' 9 | - input_translated: È plausibile la seguente frase? "Santi Cazorla ha segnato un touchdown." 10 | target_translated: 'no' 11 | - input_translated: È plausibile la seguente frase? "DeMar DeRozan è stato chiamato per goaltending." 12 | target_translated: 'sì' 13 | include: _fewshot_template_yaml 14 | task: leaderboard_bbh_sports_understanding 15 | -------------------------------------------------------------------------------- /tasks/adaptations/quandho/mc/quandho.yaml: -------------------------------------------------------------------------------- 1 | task: itabench_quandho_mc 2 | dataset_path: sapienzanlp/quandho 3 | output_type: multiple_choice 4 | 5 | description: "Ti saranno poste domande di storia italiana.\nIdentifica quali paragrafi contengono la risposta alle domande date.\n\n" 6 | 7 | training_split: train 8 | validation_split: train 9 | test_split: test 10 | 11 | doc_to_text: "Data la domanda: \"{{question}}\"\nQuale tra i seguenti paragrafi risponde alla domanda?\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nRisposta:" 12 | doc_to_target: label 13 | doc_to_choice: ["A", "B", "C", "D"] 14 | 15 | metric_list: 16 | - metric: acc 17 | aggregation: mean 18 | higher_is_better: true 19 | - metric: acc_norm 20 | aggregation: mean 21 | higher_is_better: true 22 | 23 | metadata: 24 | version: 1.0 -------------------------------------------------------------------------------- /tasks/leaderboard_it/math_it/math_hard_it.yaml: -------------------------------------------------------------------------------- 1 | task: itabench_leaderboard_math_hard_it 2 | dataset_name: null 3 | 4 | dataset_path: sapienzanlp/MATH_hard_italian 5 | 6 | process_docs: !function utils.process_docs 7 | output_type: generate_until 8 | #training_split: train 9 | test_split: test 10 | doc_to_text: !function utils.doc_to_text 11 | process_results: !function utils.process_results 12 | doc_to_target: "{{answer if few_shot is undefined else solution}}" 13 | generation_kwargs: 14 | until: 15 | - "Problema:" 16 | do_sample: false 17 | temperature: 0 18 | max_gen_toks: 1024 19 | metric_list: 20 | - metric: exact_match 21 | aggregation: mean 22 | higher_is_better: true 23 | num_fewshot: 4 24 | metadata: 25 | version: 1.0 26 | #dataset_kwargs: 27 | # trust_remote_code: true 28 | fewshot_config: 29 | sampler: first_n 30 | samples: !function utils.list_fewshot_samples 31 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/leaderboard_it.yaml: -------------------------------------------------------------------------------- 1 | group: itabench_leaderboard_it 2 | task: 3 | - itabench_leaderboard_mmlu_pro_it 4 | - itabench_leaderboard_bbh_it 5 | - itabench_leaderboard_gpqa_it 6 | - itabench_leaderboard_math_hard_it 7 | - itabench_leaderboard_ifeval_it 8 | - itabench_leaderboard_musr_it 9 | aggregate_metric_list: 10 | - metric: acc 11 | aggregation: mean 12 | weight_by_size: true 13 | - metric: acc_norm 14 | aggregation: mean 15 | weight_by_size: true 16 | - metric: exact_match 17 | aggregation: mean 18 | weight_by_size: true 19 | - metric: inst_level_loose_acc 20 | aggregation: mean 21 | weight_by_size: true 22 | - metric: inst_level_strict_acc 23 | aggregation: mean 24 | weight_by_size: true 25 | - metric: prompt_level_loose_acc 26 | aggregation: mean 27 | weight_by_size: true 28 | - metric: prompt_level_strict_acc 29 | aggregation: mean 30 | weight_by_size: true 31 | metadata: 32 | version: 1.0 33 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/object_counting.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: object_counting 2 | description: "Domande che implicano l'enumerazione di oggetti e la richiesta al modello di contarli." 3 | doc_to_choice: ["0","1","2","3","4","5","6","7","8","9","10", "11", "12", "13", "14", "15", "16", "17", "18"] 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - input_translated: Ho una mora, un clarinetto, una nettarina, una prugna, una fragola, una banana, 8 | un flauto, un’arancia e un violino. Quanti frutti ho? 9 | target_translated: '6' 10 | - input_translated: Ho un’arancia, un lampone, due pesche, una mora, una mela, un acino d’uva, 11 | una nettarina e tre prugne. Quanti frutti ho? 12 | target_translated: '11' 13 | - input_translated: Ho un cespo di lattuga, una testa di broccoli, una cipolla, un gambo di sedano, 14 | due carote, un aglio e una patata dolce. Quante verdure ho? 15 | target_translated: '8' 16 | include: _fewshot_template_yaml 17 | task: leaderboard_bbh_object_counting 18 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/mmlu_pro_it/utils.py: -------------------------------------------------------------------------------- 1 | import string 2 | 3 | 4 | def doc_to_text(doc): 5 | doc_to_text = f"{doc['question']}\n" 6 | 7 | # for i in range(len(doc["options"])): 8 | # doc_to_text += f"{string.ascii_uppercase[i]}. {doc['options'][i]}\n" 9 | 10 | # parse option_0 .... option_9 11 | for i in range(10): 12 | option_key = f"option_{i}" 13 | if option_key in doc: 14 | letter = string.ascii_uppercase[i] 15 | doc_to_text += f"{letter}. {doc[option_key]}\n" 16 | else: 17 | break 18 | 19 | doc_to_text += "Answer:" 20 | return doc_to_text 21 | 22 | 23 | def doc_to_choice(doc): 24 | # return [string.ascii_uppercase[i] for i in range(len(doc["options"]))] 25 | choices = [] 26 | for i in range(10): 27 | option_key = f"option_{i}" 28 | if option_key in doc: 29 | letter = string.ascii_uppercase[i] 30 | choices.append(letter) 31 | else: 32 | break 33 | return choices -------------------------------------------------------------------------------- /tasks/adaptations/discotex/mc/discotex.yaml: -------------------------------------------------------------------------------- 1 | task: itabench_discotex_mc 2 | 3 | dataset_path: sapienzanlp/discotex 4 | output_type: multiple_choice 5 | 6 | description: "Ti verranno poste delle domande nelle quali è presente un paragrafo, e come possibili risposte varie frasi che possono essere o meno la continuazione del paragrafo.\nIndica la frase che rappresenta la continuazione più probabile del paragrafo, oppure \"nessuna delle precedenti\" se nessuna delle continuazioni è corretta.\n\n" 7 | 8 | training_split: train 9 | validation_split: train 10 | test_split: test 11 | 12 | doc_to_text: "Paragrafo: \"{{text}}\"\nDomanda: Quali delle seguenti frasi è la continuazione più probabile del precedente paragrafo?\nA. \"{{choices[0]}}\"\nB. \"{{choices[1]}}\"\nC. \"{{choices[2]}}\"\nD. \"{{choices[3]}}\"\nE. {{choices[4]}}\nRisposta:" 13 | doc_to_target: label 14 | doc_to_choice: ["A", "B", "C", "D", "E"] 15 | 16 | metric_list: 17 | - metric: acc 18 | aggregation: mean 19 | higher_is_better: true 20 | 21 | metadata: 22 | version: 1.0 -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/web_of_lies.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: web_of_lies 2 | description: 'Valutare una funzione booleana casuale espressa come problema verbale.' 3 | doc_to_choice: ["Sì","No"] 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - input_translated: "Domanda: Fidel dice la verità. Jerry dice che Fidel dice la verità. Vina dice che Jerry dice la verità. Millicent dice che Vina mente. Raymond dice che Millicent mente. Raymond dice la verità?" 8 | target_translated: "Sì" 9 | - input_translated: "Domanda: Kristian mente. Millie dice che Kristian mente. Maybelle dice che Millie dice la verità. Fidel dice che Maybelle mente. Leda dice che Fidel mente. Leda dice la verità?" 10 | target_translated: "Sì" 11 | - input_translated: "Domanda: Kristian dice la verità. Michaela dice che Kristian mente. Raymond dice che Michaela dice la verità. Osvaldo dice che Raymond dice la verità. Jamey dice che Osvaldo dice la verità. Jamey dice la verità?" 12 | target_translated: "No" 13 | include: _fewshot_template_yaml 14 | task: leaderboard_bbh_web_of_lies 15 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/ifeval_it/ifeval.yaml: -------------------------------------------------------------------------------- 1 | task: itabench_leaderboard_ifeval_it 2 | 3 | ## DATASET ## 4 | dataset_name: null 5 | dataset_path: sapienzanlp/IFEval_italian 6 | test_split: train 7 | 8 | ## PROMPT & ANSWERING ## 9 | output_type: generate_until 10 | num_fewshot: 0 11 | 12 | #doc_to_text: translated_prompt 13 | doc_to_text: prompt 14 | 15 | doc_to_target: 0 16 | generation_kwargs: 17 | until: [] 18 | do_sample: false 19 | temperature: 0.0 20 | max_gen_toks: 1280 21 | process_results: !function utils.process_results 22 | fewshot_config: 23 | sampler: first_n 24 | 25 | ## METRICS ## 26 | metric_list: 27 | - metric: prompt_level_strict_acc 28 | aggregation: mean 29 | higher_is_better: true 30 | - metric: inst_level_strict_acc 31 | aggregation: !function utils.agg_inst_level_acc 32 | higher_is_better: true 33 | - metric: prompt_level_loose_acc 34 | aggregation: mean 35 | higher_is_better: true 36 | - metric: inst_level_loose_acc 37 | aggregation: !function utils.agg_inst_level_acc 38 | higher_is_better: true 39 | 40 | ## MISC ## 41 | metadata: 42 | version: 1.0 43 | 44 | -------------------------------------------------------------------------------- /tasks/translations/gsm8k/_gsm8k_generation_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: sapienzanlp/gsm8k_italian 2 | output_type: generate_until 3 | 4 | training_split: train 5 | fewshot_split: train 6 | test_split: test 7 | 8 | doc_to_text: "query" 9 | doc_to_target: "answer" 10 | 11 | metric_list: 12 | - metric: exact_match 13 | aggregation: mean 14 | higher_is_better: true 15 | ignore_case: true 16 | ignore_punctuation: false 17 | regexes_to_ignore: 18 | - "," 19 | - "\\$" 20 | - "(?s).*#### " 21 | - "\\.$" 22 | generation_kwargs: 23 | until: 24 | - "Question:" 25 | - "" 26 | - "<|im_end|>" 27 | do_sample: false 28 | temperature: 0.0 29 | max_new_tokens: 512 30 | repeats: 1 31 | 32 | filter_list: 33 | - name: "strict-match" 34 | filter: 35 | - function: "regex" 36 | regex_pattern: "#### (\\-?[0-9\\.\\,]+)" 37 | - function: "take_first" 38 | - name: "flexible-extract" 39 | filter: 40 | - function: "regex" 41 | group_select: -1 42 | regex_pattern: "(-?[$0-9.,]{2,})|(-?[0-9]+)" 43 | - function: "take_first" 44 | 45 | metadata: 46 | version: 1.0 47 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/navigate.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: navigate 2 | description: 'Date una serie di istruzioni di navigazione, determinare se si tornerà al punto di partenza.' 3 | doc_to_choice: ["Sì","No"] 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - input_translated: "Se segui queste istruzioni, ritorni al punto di partenza? 8 | Mantieni sempre la direzione frontale. Fai 2 passi a destra. Fai 9 passi a sinistra. Fai 7 passi a destra. 9 | 10 | Opzioni: 11 | 12 | - Sì 13 | 14 | - No" 15 | target_translated: 'Sì' 16 | - input_translated: "Se segui queste istruzioni, ritorni al punto di partenza? 17 | Gira a sinistra. Girati. Gira a sinistra. Fai 7 passi. Fai 2 passi. Fai 4 passi. 18 | Fai 8 passi. 19 | 20 | Opzioni: 21 | 22 | - Sì 23 | 24 | - No" 25 | target_translated: 'No' 26 | - input_translated: "Se segui queste istruzioni, ritorni al punto di partenza? 27 | Girati. Fai 1 passo. Fai 6 passi. Girati. Fai 6 passi. Fai 9 passi. 28 | Fai 1 passo. 29 | 30 | Opzioni: 31 | 32 | - Sì 33 | 34 | - No" 35 | target_translated: 'No' 36 | include: _fewshot_template_yaml 37 | task: leaderboard_bbh_navigate 38 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/_leaderboard_bbh.yaml: -------------------------------------------------------------------------------- 1 | group: itabench_leaderboard_bbh_it 2 | task: 3 | - leaderboard_bbh_boolean_expressions 4 | - leaderboard_bbh_causal_judgement 5 | - leaderboard_bbh_date_understanding 6 | - leaderboard_bbh_disambiguation_qa 7 | - leaderboard_bbh_formal_fallacies 8 | - leaderboard_bbh_geometric_shapes 9 | - leaderboard_bbh_logical_deduction_five_objects 10 | - leaderboard_bbh_logical_deduction_seven_objects 11 | - leaderboard_bbh_logical_deduction_three_objects 12 | - leaderboard_bbh_movie_recommendation 13 | - leaderboard_bbh_navigate 14 | - leaderboard_bbh_object_counting 15 | - leaderboard_bbh_penguins_in_a_table 16 | - leaderboard_bbh_reasoning_about_colored_objects 17 | - leaderboard_bbh_salient_translation_error_detection 18 | - leaderboard_bbh_snarks 19 | - leaderboard_bbh_sports_understanding 20 | - leaderboard_bbh_temporal_sequences 21 | - leaderboard_bbh_tracking_shuffled_objects_five_objects 22 | - leaderboard_bbh_tracking_shuffled_objects_seven_objects 23 | - leaderboard_bbh_tracking_shuffled_objects_three_objects 24 | - leaderboard_bbh_web_of_lies 25 | aggregate_metric_list: 26 | - metric: acc_norm 27 | aggregation: mean 28 | weight_by_size: true 29 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/date_understanding.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: date_understanding 2 | description: 'Dedurre la data dal contesto.' 3 | doc_to_choice: ["(A)", "(B)", "(C)", "(D)", "(E)", "(F)"] 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - input_translated: "Oggi è la Vigilia di Natale del 1937. Qual è la data di 10 giorni fa in MM/DD/YYYY? 8 | 9 | Opzioni: 10 | 11 | (A) 12/14/2026 12 | (B) 12/14/1950 13 | (C) 12/14/2007 14 | (D) 12/14/1937 15 | (E) 07/14/1938 16 | (F) 12/14/1988" 17 | target_translated: (D) 18 | - input_translated: "Domani è l’11/12/2019. Qual è la data di un anno fa rispetto a oggi in MM/DD/YYYY? 19 | 20 | Opzioni: 21 | 22 | (A) 09/04/2018 23 | (B) 11/11/2018 24 | (C) 08/25/2018 25 | (D) 11/02/2018 26 | (E) 11/04/2018" 27 | target_translated: (B) 28 | - input_translated: "Jane e John si sono sposati il 2 gennaio 1958. Oggi è il loro quinto anniversario. Qual è la data di domani in MM/DD/YYYY? 29 | 30 | Opzioni: 31 | 32 | (A) 01/11/1961 33 | (B) 01/03/1963 34 | (C) 01/18/1961 35 | (D) 10/14/1960 36 | (E) 01/03/1982 37 | (F) 12/03/1960" 38 | target_translated: (B) 39 | include: _fewshot_template_yaml 40 | task: leaderboard_bbh_date_understanding 41 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/movie_recommendation.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: movie_recommendation 2 | description: "Consiglia film simili all'elenco di film fornito. di oggetti." 3 | doc_to_choice: ["(A)","(B)","(C)","(D)","(E)","(F)"] 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - input_translated: "Trova un film simile a Star Wars Episodio IV - Una nuova speranza, Indiana Jones e l’ultima crociata, Star Wars Episodio V - L’Impero colpisce ancora, Il grande Lebowski: 8 | 9 | Opzioni: 10 | 11 | (A) Tetsuo 12 | (B) The Ironman 13 | (C) La storia fantastica (The Princess Bride) 14 | (D) The Barkley Marathons: The Race That Eats Its Young 15 | (E) Bug" 16 | target_translated: (C) 17 | - input_translated: "Trova un film simile a Twister, Il silenzio degli innocenti, Independence Day, Braveheart: 18 | 19 | Opzioni: 20 | 21 | (A) They Shoot Horses 22 | (B) Don’t They 23 | (C) Forrest Gump 24 | (D) The Salton Sea 25 | (E) Extreme Days" 26 | target_translated: (C) 27 | - input_translated: "Trova un film simile a Minority Report, Total Recall, Inside Out, Forrest Gump: 28 | 29 | Opzioni: 30 | 31 | (A) Phenomena 32 | (B) Lilting 33 | (C) Catwoman 34 | (D) Edge of Tomorrow" 35 | target_translated: (D) 36 | include: _fewshot_template_yaml 37 | task: leaderboard_bbh_movie_recommendation 38 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/gpqa_it/utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | import re 3 | 4 | import datasets 5 | 6 | 7 | def preprocess(text): 8 | if text is None: 9 | return " " 10 | text = text.strip() 11 | text = text.replace(" [title]", ". ") 12 | text = re.sub("\\[.*?\\]", "", text) 13 | text = text.replace(" ", " ") 14 | return text 15 | 16 | 17 | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: 18 | def _process_doc(doc): 19 | # choices = [ 20 | # preprocess(doc["Incorrect Answer 1"]), 21 | # preprocess(doc["Incorrect Answer 2"]), 22 | # preprocess(doc["Incorrect Answer 3"]), 23 | # preprocess(doc["Correct Answer"]), 24 | # ] 25 | # random.shuffle(choices) 26 | # correct_answer_index = choices.index(preprocess(doc["Correct Answer"])) 27 | 28 | choices = doc["choice_translations"] 29 | correct_answer = choices[doc["label"]] 30 | random.shuffle(choices) 31 | correct_answer_index = choices.index(correct_answer) 32 | correct_answer_letter = f"({chr(65 + correct_answer_index)})" 33 | 34 | out_doc = { 35 | "choice1": choices[0], 36 | "choice2": choices[1], 37 | "choice3": choices[2], 38 | "choice4": choices[3], 39 | "answer": correct_answer_letter, 40 | } 41 | return out_doc 42 | 43 | return dataset.map(_process_doc) 44 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/disambiguation_qa.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: disambiguation_qa 2 | description: 'Chiarire il significato delle frasi con pronomi ambigui.' 3 | doc_to_choice: ["(A)", "(B)", "(C)"] 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - input_translated: "Nelle seguenti frasi, spiega l’antecedente del pronome (a quale elemento si riferisce il pronome), oppure indica che è ambiguo. 8 | 9 | Frase: Il capo disse al consigliere che loro avevano preso il giorno libero. 10 | 11 | Opzioni: 12 | 13 | (A) Il capo ha preso il giorno libero 14 | (B) Il consigliere ha preso il giorno libero 15 | (C) Ambiguo" 16 | target_translated: (A) 17 | - input_translated: "Nelle seguenti frasi, spiega l’antecedente del pronome (a quale elemento si riferisce il pronome), oppure indica che è ambiguo. 18 | 19 | Frase: Il manager ha inviato un messaggio alla segretaria, ma lui non ha ancora risposto. 20 | 21 | Opzioni: 22 | 23 | (A) La segretaria non ha ancora risposto 24 | (B) Il manager non ha ancora risposto 25 | (C) Ambiguo" 26 | target_translated: (A) 27 | - input_translated: "Nelle seguenti frasi, spiega l’antecedente del pronome (a quale elemento si riferisce il pronome), oppure indica che è ambiguo. 28 | 29 | Frase: Bailey ha intenzione di incontrare il direttore nel suo ufficio. 30 | 31 | Opzioni:** 32 | 33 | (A) Sarà l’ufficio di Bailey 34 | (B) Sarà l’ufficio del direttore 35 | (C) Ambiguo" 36 | target_translated: (C) 37 | include: _fewshot_template_yaml 38 | task: leaderboard_bbh_disambiguation_qa 39 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/snarks.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: snarks 2 | description: "Determina quale delle due frasi è sarcastica. 3 | 4 | Secondo il Cambridge University Dictionary, il sarcasmo è: 5 | 6 | l'uso di osservazioni che significano chiaramente l'opposto di ciò che dicono, fatte allo scopo di ferire i sentimenti di qualcuno 7 | o di criticare qualcosa in modo umoristico. 8 | 9 | Le frasi sarcastiche contengono spesso espressioni satiriche o ironiche, iperboli, osservazioni ambivalenti o spiritose." 10 | doc_to_choice: ["(A)","(B)"] 11 | fewshot_config: 12 | sampler: first_n 13 | samples: 14 | - input_translated: "Quale affermazione è sarcastica? 15 | 16 | Opzioni: 17 | 18 | (A) Sì, perché avere interessi e informarsi attivamente su di essi è un enorme spreco 19 | 20 | (B) Sì, perché avere interessi e informarsi attivamente su di essi è una cosa enorme" 21 | target_translated: (A) 22 | - input_translated: "Quale affermazione è sarcastica? 23 | 24 | Opzioni: 25 | 26 | (A) Nessuno sarà in disaccordo con te su questo. Evitare gli attacchi ad hominem aiuta davvero la tua tesi 27 | 28 | (B) Nessuno sarà in disaccordo con te su questo. Gli attacchi ad hominem aiutano davvero la tua tesi" 29 | target_translated: (B) 30 | - input_translated: "Quale affermazione è sarcastica? 31 | 32 | Opzioni: 33 | 34 | (A) Coerenza nelle punizioni della lega? Cosa pensi che sia, politica? 35 | 36 | (B) Coerenza nelle punizioni della lega? Cosa pensi che sia, morale?" 37 | target_translated: (A) 38 | include: _fewshot_template_yaml 39 | task: leaderboard_bbh_snarks 40 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/geometric_shapes.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: geometric_shapes 2 | description: 'Assegna un nome alle forme geometriche partendo dai loro percorsi SVG.' 3 | doc_to_choice: ["(A)","(B)","(C)","(D)","(E)","(F)","(G)","(H)","(I)","(J)","(K)"] 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - input_translated: "L'elemento SVG path 8 | disegna un 9 | 10 | Opzioni: 11 | 12 | (A) cerchio 13 | (B) eptagono 14 | (C) esagono 15 | (D) aquilone 16 | (E) linea 17 | (F) ottagono 18 | (G) pentagono 19 | (H) rettangolo 20 | (I) settore 21 | (J) triangolo" 22 | target_translated: (F) 23 | - input_translated: "L'elemento SVG path disegna un 24 | 25 | Opzioni: 26 | 27 | (A) cerchio 28 | (B) eptagono 29 | (C) esagono 30 | (D) aquilone 31 | (E) linea 32 | (F) ottagono 33 | (G) pentagono 34 | (H) rettangolo 35 | (I) settore 36 | (J) triangolo" 37 | target_translated: (G) 38 | - input_translated: "L'elemento SVG path disegna un 39 | 40 | Opzioni: 41 | 42 | (A) cerchio 43 | (B) eptagono 44 | (C) esagono 45 | (D) aquilone 46 | (E) linea 47 | (F) ottagono 48 | (G) pentagono 49 | (H) rettangolo 50 | (I) settore 51 | (J) triangolo" 52 | target_translated: (D) 53 | include: _fewshot_template_yaml 54 | task: leaderboard_bbh_geometric_shapes 55 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/logical_deduction_three_objects.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: logical_deduction_three_objects 2 | description: "Risolvi un compito di deduzione logica che richiede di dedurre l'ordine di una sequenza di oggetti." 3 | doc_to_choice: ["(A)","(B)","(C)"] 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - input_translated: "I seguenti paragrafi descrivono ciascuno un insieme di tre oggetti disposti in un ordine fisso. Le affermazioni sono logicamente coerenti all’interno di ogni paragrafo. 8 | In un torneo di golf c’erano tre golfisti: Amy, Eli ed Eve. Eve è arrivata davanti ad Amy. Eli è arrivato dietro ad Amy. 9 | 10 | Opzioni: 11 | 12 | (A) Amy è arrivata ultima 13 | (B) Eli è arrivato ultimo 14 | (C) Eve è arrivata ultima" 15 | target_translated: (B) 16 | - input_translated: "I seguenti paragrafi descrivono ciascuno un insieme di tre oggetti disposti in un ordine fisso. Le affermazioni sono logicamente coerenti all’interno di ogni paragrafo. 17 | Su uno scaffale ci sono tre libri: un libro bianco, un libro verde e un libro arancione. Il libro verde è a destra del libro bianco. Il libro arancione è quello più a destra. 18 | 19 | Opzioni: 20 | 21 | (A) Il libro bianco è quello più a sinistra 22 | (B) Il libro verde è quello più a sinistra 23 | (C) Il libro arancione è quello più a sinistra" 24 | target_translated: (A) 25 | - input_translated: "I seguenti paragrafi descrivono ciascuno un insieme di tre oggetti disposti in un ordine fisso. Le affermazioni sono logicamente coerenti all’interno di ogni paragrafo. 26 | Su uno scaffale ci sono tre libri: un libro rosso, un libro grigio e un libro bianco. Il libro bianco è alla sinistra del libro grigio. Il libro rosso è il secondo da sinistra. 27 | 28 | Opzioni: 29 | 30 | (A) Il libro rosso è quello più a sinistra 31 | (B) Il libro grigio è quello più a sinistra 32 | (C) Il libro bianco è quello più a sinistra" 33 | target_translated: (C) 34 | include: _fewshot_template_yaml 35 | task: leaderboard_bbh_logical_deduction_three_objects 36 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/logical_deduction_five_objects.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: logical_deduction_five_objects 2 | description: "Risolvi un compito di deduzione logica che richiede di dedurre l'ordine di una sequenza di oggetti." 3 | doc_to_choice: ["(A)","(B)","(C)","(D)","(E)"] 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - input_translated: "I seguenti paragrafi descrivono ciascuno un insieme di tre oggetti disposti in un ordine fisso. Le affermazioni sono logicamente coerenti all’interno di ogni paragrafo. 8 | In un torneo di golf c’erano tre golfisti: Amy, Eli ed Eve. Eve è arrivata davanti ad Amy. Eli è arrivato dietro ad Amy. 9 | 10 | Opzioni: 11 | 12 | (A) Amy è arrivata ultima 13 | (B) Eli è arrivato ultimo 14 | (C) Eve è arrivata ultima" 15 | target_translated: (B) 16 | - input_translated: "I seguenti paragrafi descrivono ciascuno un insieme di tre oggetti disposti in un ordine fisso. Le affermazioni sono logicamente coerenti all’interno di ogni paragrafo. 17 | Su uno scaffale ci sono tre libri: un libro bianco, un libro verde e un libro arancione. Il libro verde è a destra del libro bianco. Il libro arancione è quello più a destra. 18 | 19 | Opzioni: 20 | 21 | (A) Il libro bianco è quello più a sinistra 22 | (B) Il libro verde è quello più a sinistra 23 | (C) Il libro arancione è quello più a sinistra" 24 | target_translated: (A) 25 | - input_translated: "I seguenti paragrafi descrivono ciascuno un insieme di tre oggetti disposti in un ordine fisso. Le affermazioni sono logicamente coerenti all’interno di ogni paragrafo. 26 | Su uno scaffale ci sono tre libri: un libro rosso, un libro grigio e un libro bianco. Il libro bianco è alla sinistra del libro grigio. Il libro rosso è il secondo da sinistra. 27 | 28 | Opzioni: 29 | 30 | (A) Il libro rosso è quello più a sinistra 31 | (B) Il libro grigio è quello più a sinistra 32 | (C) Il libro bianco è quello più a sinistra" 33 | target_translated: (C) 34 | include: _fewshot_template_yaml 35 | task: leaderboard_bbh_logical_deduction_five_objects 36 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/logical_deduction_seven_objects.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: logical_deduction_seven_objects 2 | description: "Risolvi un compito di deduzione logica che richiede di dedurre l'ordine di una sequenza di oggetti." 3 | doc_to_choice: ["(A)","(B)","(C)","(D)","(E)","(F)","(G)"] 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - input_translated: "I seguenti paragrafi descrivono ciascuno un insieme di tre oggetti disposti in un ordine fisso. Le affermazioni sono logicamente coerenti all’interno di ogni paragrafo. 8 | In un torneo di golf c’erano tre golfisti: Amy, Eli ed Eve. Eve è arrivata davanti ad Amy. Eli è arrivato dietro ad Amy. 9 | 10 | Opzioni: 11 | 12 | (A) Amy è arrivata ultima 13 | (B) Eli è arrivato ultimo 14 | (C) Eve è arrivata ultima" 15 | target_translated: (B) 16 | - input_translated: "I seguenti paragrafi descrivono ciascuno un insieme di tre oggetti disposti in un ordine fisso. Le affermazioni sono logicamente coerenti all’interno di ogni paragrafo. 17 | Su uno scaffale ci sono tre libri: un libro bianco, un libro verde e un libro arancione. Il libro verde è a destra del libro bianco. Il libro arancione è quello più a destra. 18 | 19 | Opzioni: 20 | 21 | (A) Il libro bianco è quello più a sinistra 22 | (B) Il libro verde è quello più a sinistra 23 | (C) Il libro arancione è quello più a sinistra" 24 | target_translated: (A) 25 | - input_translated: "I seguenti paragrafi descrivono ciascuno un insieme di tre oggetti disposti in un ordine fisso. Le affermazioni sono logicamente coerenti all’interno di ogni paragrafo. 26 | Su uno scaffale ci sono tre libri: un libro rosso, un libro grigio e un libro bianco. Il libro bianco è alla sinistra del libro grigio. Il libro rosso è il secondo da sinistra. 27 | 28 | Opzioni: 29 | 30 | (A) Il libro rosso è quello più a sinistra 31 | (B) Il libro grigio è quello più a sinistra 32 | (C) Il libro bianco è quello più a sinistra" 33 | target_translated: (C) 34 | include: _fewshot_template_yaml 35 | task: leaderboard_bbh_logical_deduction_seven_objects 36 | -------------------------------------------------------------------------------- /tasks/translations/hellaswag/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import datasets 4 | 5 | 6 | def preprocess(text): 7 | """ 8 | Preprocess the text by removing leading and trailing whitespaces, replacing brackets with periods, and removing any text within brackets. 9 | Slightly changed from the code from lm_eval/tasks/hellaswag/utils.py. 10 | """ 11 | text = text.strip() 12 | text = re.sub("(\\. )?\\[.*?\\]", "\\1", text) 13 | text = text.replace(" ", " ") 14 | return text 15 | 16 | 17 | def process_docs( 18 | dataset: datasets.Dataset, 19 | source_language: str, 20 | target_language: str, 21 | ) -> datasets.Dataset: 22 | """ 23 | Prepare the dataset and builds the prompt using the source and target languages. 24 | """ 25 | 26 | def _process_doc(doc): 27 | if source_language == "en": 28 | query = doc["input"] 29 | # activity_label = doc["metadata"]["activity_label"] 30 | else: 31 | query = doc["input_translation"] 32 | # activity_label = doc["metadata"]["activity_label_translation"] 33 | 34 | if target_language == "en": 35 | choices = doc["choices"] 36 | else: 37 | choices = doc["choices_translation"] 38 | 39 | # query = activity_label + ": " + preprocess(query) 40 | query = preprocess(query) 41 | choices = [preprocess(choice) for choice in choices] 42 | 43 | return { 44 | "id": doc["id"], 45 | "query": query, 46 | "choices": choices, 47 | "gold": int(doc["label"]), 48 | } 49 | 50 | return dataset.map(_process_doc) 51 | 52 | 53 | def process_docs_it_it(dataset: datasets.Dataset) -> datasets.Dataset: 54 | return process_docs(dataset, "it", "it") 55 | 56 | 57 | def process_docs_it_en(dataset: datasets.Dataset) -> datasets.Dataset: 58 | return process_docs(dataset, "it", "en") 59 | 60 | 61 | def process_docs_en_en(dataset: datasets.Dataset) -> datasets.Dataset: 62 | return process_docs(dataset, "en", "en") 63 | 64 | 65 | def process_docs_en_it(dataset: datasets.Dataset) -> datasets.Dataset: 66 | return process_docs(dataset, "en", "it") 67 | -------------------------------------------------------------------------------- /tasks/translations/winogrande/utils.py: -------------------------------------------------------------------------------- 1 | import datasets 2 | 3 | QUERY_PREFIX = { 4 | "en": "{input}", 5 | "it": "{input}", 6 | } 7 | 8 | ANSWER_PREFIX = { 9 | "en": "", 10 | "it": "", 11 | } 12 | 13 | 14 | def process_docs( 15 | dataset: datasets.Dataset, 16 | source_language: str, 17 | target_language: str, 18 | ) -> datasets.Dataset: 19 | """ 20 | Prepare the dataset and builds the prompt using the source and target languages. 21 | """ 22 | 23 | def _process_doc(doc): 24 | if source_language == "en": 25 | input = doc["input"] 26 | else: 27 | input = doc["input_translation"] 28 | 29 | if target_language == "en": 30 | choices = doc["choices"] 31 | else: 32 | choices = doc["choices_translation"] 33 | 34 | # Split on the first underscore to get the target text. 35 | idx = input.index("_") 36 | input, target_text = input[:idx], input[idx + 1 :] 37 | input = input.strip() + " " 38 | target_text = target_text.strip() 39 | 40 | query = QUERY_PREFIX[source_language].format(input=input) 41 | query += ANSWER_PREFIX[target_language] 42 | 43 | choices = [choice.strip() for choice in choices] 44 | # TODO: This may not be the best for languages without spaces. 45 | choices = [choice + " " + target_text for choice in choices] 46 | 47 | return { 48 | "id": doc["id"], 49 | "query": query, 50 | "choices": choices, 51 | "gold": int(doc["label"]), 52 | } 53 | 54 | return dataset.map(_process_doc).filter(lambda x: x["query"].strip() != "") 55 | 56 | 57 | def process_docs_it_it(dataset: datasets.Dataset) -> datasets.Dataset: 58 | return process_docs(dataset, "it", "it") 59 | 60 | 61 | def process_docs_it_en(dataset: datasets.Dataset) -> datasets.Dataset: 62 | return process_docs(dataset, "it", "en") 63 | 64 | 65 | def process_docs_en_en(dataset: datasets.Dataset) -> datasets.Dataset: 66 | return process_docs(dataset, "en", "en") 67 | 68 | 69 | def process_docs_en_it(dataset: datasets.Dataset) -> datasets.Dataset: 70 | return process_docs(dataset, "en", "it") 71 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/penguins_in_a_table.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: penguins_in_a_table 2 | description: 'Rispondi alle domande su una tabella di pinguini e sulle loro caratteristiche.' 3 | doc_to_choice: ["(A)","(B)","(C)","(D)","(E)"] 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - input_translated: "Ecco una tabella in cui la prima riga è un'intestazione e ogni riga successiva è un pinguino: 8 | nome, età, altezza (cm), peso (kg) 9 | Louis, 7, 50, 11 10 | Bernard, 5, 80, 13 11 | Vincent, 9, 60, 11 12 | Gwen, 8, 70, 15 13 | 14 | Per esempio: l'età di Louis è 7, il peso di Gwen è 15 kg, l’altezza di Bernard è 80 cm. 15 | 16 | Ora aggiungiamo un pinguino alla tabella: 17 | 18 | James, 12, 90, 12 19 | 20 | Quanti pinguini hanno meno di 8 anni? 21 | 22 | Opzioni: 23 | 24 | (A) 1 25 | (B) 2 26 | (C) 3 27 | (D) 4 28 | (E) 5" 29 | target_translated: (B) 30 | - input_translated: "Ecco una tabella in cui la prima riga è un'intestazione e ogni riga successiva è un pinguino: 31 | nome, età, altezza (cm), peso (kg) 32 | Louis, 7, 50, 11 33 | Bernard, 5, 80, 13 34 | Vincent, 9, 60, 11 35 | Gwen, 8, 70, 15 36 | 37 | Per esempio: l'età di Louis è 7, il peso di Gwen è 15 kg, l’altezza di Bernard è 80 cm. 38 | 39 | Qual è il pinguino più giovane? 40 | 41 | Opzioni: 42 | 43 | (A) Louis 44 | (B) Bernard 45 | (C) Vincent 46 | (D) Gwen 47 | (E) James" 48 | target_translated: (B) 49 | - input_translated: "Ecco una tabella in cui la prima riga è un'intestazione e ogni riga successiva è un pinguino: 50 | nome, età, altezza (cm), peso (kg) 51 | Louis, 7, 50, 11 52 | Bernard, 5, 80, 13 53 | Vincent, 9, 60, 11 54 | Gwen, 8, 70, 15 55 | 56 | Per esempio: l'età di Louis è 7, il peso di Gwen è 15 kg, l’altezza di Bernard è 80 cm. 57 | 58 | Qual è il nome del secondo pinguino in ordine alfabetico? 59 | 60 | Opzioni: 61 | 62 | (A) Louis 63 | (B) Bernard 64 | (C) Vincent 65 | (D) Gwen 66 | (E) James" 67 | target_translated: (D) 68 | include: _fewshot_template_yaml 69 | task: leaderboard_bbh_penguins_in_a_table 70 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/tracking_shuffled_objects_three_objects.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: tracking_shuffled_objects_three_objects 2 | description: 'Un compito che richiede di determinare le posizioni finali di un insieme di oggetti date le loro posizioni iniziali e una descrizione di una sequenza di scambi.' 3 | doc_to_choice: ["(A)","(B)","(C)"] 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - input_translated: "Alice, Bob e Claire stanno giocando a un gioco. All’inizio del gioco, ognuno di loro ha una palla: Alice ha una palla gialla, Bob ha una palla blu e Claire ha una palla rosa. 8 | 9 | Man mano che il gioco procede, le coppie di giocatori si scambiano le palle. Per prima cosa, Claire e Alice si scambiano le palle. Poi, Alice e Bob si scambiano le palle. Infine, Claire e Bob si scambiano le palle. Alla fine del gioco, Bob ha la 10 | 11 | Opzioni: 12 | 13 | (A) palla gialla 14 | (B) palla blu 15 | (C) palla rosa" 16 | target_translated: (A) 17 | - input_translated: "Alice, Bob e Claire stanno giocando a un gioco. All’inizio del gioco, ognuno di loro ha una palla: Alice ha una palla bianca, Bob ha una palla viola e Claire ha una palla rosa. 18 | 19 | Man mano che il gioco procede, le coppie di giocatori si scambiano le palle. Per prima cosa, Bob e Alice si scambiano le palle. Poi, Bob e Claire si scambiano le palle. Infine, Bob e Alice si scambiano di nuovo le palle. Alla fine del gioco, Alice ha la 20 | 21 | Opzioni: 22 | 23 | (A) palla bianca 24 | (B) palla viola 25 | (C) palla rosa" 26 | target_translated: (C) 27 | - input_translated: "Alice, Bob e Claire sono ballerini in una square dance. All’inizio di una canzone, ognuno di loro ha un partner: Alice balla con Lola, Bob balla con Rodrigo e Claire balla con Patrick. 28 | 29 | Durante la canzone, i ballerini cambiano spesso partner. Per prima cosa, Alice e Bob si scambiano i partner. Poi, Claire e Bob si scambiano i partner. Infine, Bob e Alice si scambiano di nuovo i partner. Alla fine del ballo, Alice sta ballando con 30 | 31 | Opzioni: 32 | 33 | (A) Lola 34 | (B) Rodrigo 35 | (C) Patrick" 36 | target_translated: (C) 37 | include: _fewshot_template_yaml 38 | task: leaderboard_bbh_tracking_shuffled_objects_three_objects 39 | -------------------------------------------------------------------------------- /tasks/leaderboard_it/bbh_it/tracking_shuffled_objects_five_objects.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: tracking_shuffled_objects_five_objects 2 | description: 'Un compito che richiede di determinare le posizioni finali di un insieme di oggetti date le loro posizioni iniziali e una descrizione di una sequenza di scambi.' 3 | doc_to_choice: ["(A)","(B)","(C)","(D)","(E)"] 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - input_translated: "Alice, Bob e Claire stanno giocando a un gioco. All’inizio del gioco, ognuno di loro ha una palla: Alice ha una palla gialla, Bob ha una palla blu e Claire ha una palla rosa. 8 | 9 | Man mano che il gioco procede, le coppie di giocatori si scambiano le palle. Per prima cosa, Claire e Alice si scambiano le palle. Poi, Alice e Bob si scambiano le palle. Infine, Claire e Bob si scambiano le palle. Alla fine del gioco, Bob ha la 10 | 11 | Opzioni: 12 | 13 | (A) palla gialla 14 | (B) palla blu 15 | (C) palla rosa" 16 | target_translated: (A) 17 | - input_translated: "Alice, Bob e Claire stanno giocando a un gioco. All’inizio del gioco, ognuno di loro ha una palla: Alice ha una palla bianca, Bob ha una palla viola e Claire ha una palla rosa. 18 | 19 | Man mano che il gioco procede, le coppie di giocatori si scambiano le palle. Per prima cosa, Bob e Alice si scambiano le palle. Poi, Bob e Claire si scambiano le palle. Infine, Bob e Alice si scambiano di nuovo le palle. Alla fine del gioco, Alice ha la 20 | 21 | Opzioni: 22 | 23 | (A) palla bianca 24 | (B) palla viola 25 | (C) palla rosa" 26 | target_translated: (C) 27 | - input_translated: "Alice, Bob e Claire sono ballerini in una square dance. All’inizio di una canzone, ognuno di loro ha un partner: Alice balla con Lola, Bob balla con Rodrigo e Claire balla con Patrick. 28 | 29 | Durante la canzone, i ballerini cambiano spesso partner. Per prima cosa, Alice e Bob si scambiano i partner. Poi, Claire e Bob si scambiano i partner. Infine, Bob e Alice si scambiano di nuovo i partner. Alla fine del ballo, Alice sta ballando con 30 | 31 | Opzioni: 32 | 33 | (A) Lola 34 | (B) Rodrigo 35 | (C) Patrick" 36 | target_translated: (C) 37 | include: _fewshot_template_yaml 38 | task: leaderboard_bbh_tracking_shuffled_objects_five_objects 39 | --------------------------------------------------------------------------------