├── LICENSE ├── README.md ├── app.py ├── demo_batch_completion.py ├── demo_completion.py ├── demo_multiturn_chat.py ├── demo_token_control.py ├── eval ├── cd_metric.py ├── data │ ├── cd3_test.jsonl │ ├── cd4_test.jsonl │ ├── cd5_test.jsonl │ ├── sudoku_4x4_10.jsonl │ ├── sudoku_4x4_11.jsonl │ ├── sudoku_4x4_12.jsonl │ ├── sudoku_4x4_4.jsonl │ ├── sudoku_4x4_5.jsonl │ ├── sudoku_4x4_6.jsonl │ ├── sudoku_4x4_7.jsonl │ ├── sudoku_4x4_8.jsonl │ ├── sudoku_4x4_9.jsonl │ └── trip_planning.json ├── eval.py ├── eval_dream_gen.sh ├── eval_dream_gen_planning.sh ├── eval_dream_mc.sh ├── eval_planning.py ├── postprocess_code.py ├── sanitize.py ├── sudoku_metric.py └── trip_metric.py ├── eval_instruct ├── .gitignore ├── README.md ├── eval.sh ├── lm_eval │ ├── __init__.py │ ├── __main__.py │ ├── api │ │ ├── __init__.py │ │ ├── filter.py │ │ ├── group.py │ │ ├── instance.py │ │ ├── metrics.py │ │ ├── model.py │ │ ├── registry.py │ │ ├── samplers.py │ │ └── task.py │ ├── caching │ │ ├── __init__.py │ │ └── cache.py │ ├── decontamination │ │ ├── __init__.py │ │ ├── archiver.py │ │ ├── decontaminate.py │ │ └── janitor.py │ ├── evaluator.py │ ├── evaluator_utils.py │ ├── filters │ │ ├── __init__.py │ │ ├── custom.py │ │ ├── decontamination.py │ │ ├── extraction.py │ │ ├── selection.py │ │ └── transformation.py │ ├── loggers │ │ ├── __init__.py │ │ ├── evaluation_tracker.py │ │ ├── utils.py │ │ └── wandb_logger.py │ ├── models │ │ ├── __init__.py │ │ ├── diffllm.py │ │ ├── dummy.py │ │ ├── huggingface.py │ │ └── utils.py │ ├── prompts │ │ └── __init__.py │ ├── tasks │ │ ├── README.md │ │ ├── __init__.py │ │ ├── gpqa │ │ │ ├── README.md │ │ │ ├── cot_n_shot │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── _gpqa_cot_n_shot_yaml │ │ │ │ ├── gpqa_diamond_cot_n_shot.yaml │ │ │ │ ├── gpqa_extended_cot_n_shot.yaml │ │ │ │ ├── gpqa_main_cot_n_shot.yaml │ │ │ │ └── utils.py │ │ │ ├── cot_zeroshot │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── _gpqa_cot_zeroshot_yaml │ │ │ │ ├── gpqa_diamond_cot_zeroshot.yaml │ │ │ │ ├── gpqa_extended_cot_zeroshot.yaml │ │ │ │ ├── gpqa_main_cot_zeroshot.yaml │ │ │ │ └── utils.py │ │ │ ├── generative │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── _gpqa_generative_n_shot_yaml │ │ │ │ ├── gpqa_diamond_generative_n_shot.yaml │ │ │ │ ├── gpqa_extended_generative_n_shot.yaml │ │ │ │ ├── gpqa_main_generative_n_shot.yaml │ │ │ │ └── utils.py │ │ │ ├── n_shot │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── _gpqa_n_shot_yaml │ │ │ │ ├── gpqa_diamond_n_shot.yaml │ │ │ │ ├── gpqa_extended_n_shot.yaml │ │ │ │ ├── gpqa_main_n_shot.yaml │ │ │ │ └── utils.py │ │ │ └── zeroshot │ │ │ │ ├── _generate_configs.py │ │ │ │ ├── _gpqa_zeroshot_yaml │ │ │ │ ├── gpqa_diamond_zeroshot.yaml │ │ │ │ ├── gpqa_extended_zeroshot.yaml │ │ │ │ ├── gpqa_main_zeroshot.yaml │ │ │ │ └── utils.py │ │ ├── gsm8k │ │ │ ├── README.md │ │ │ ├── gsm8k-cot-llama.yaml │ │ │ ├── gsm8k-cot-self-consistency.yaml │ │ │ ├── gsm8k-cot-zeroshot.yaml │ │ │ ├── gsm8k-cot.yaml │ │ │ └── gsm8k.yaml │ │ ├── humaneval │ │ │ ├── README.md │ │ │ ├── humaneval.yaml │ │ │ ├── humaneval_5.yaml │ │ │ ├── humaneval_5_instruct.yaml │ │ │ ├── humaneval_5_instruct_noprefix.yaml │ │ │ ├── humaneval_64.yaml │ │ │ ├── humaneval_64_instruct.yaml │ │ │ ├── humaneval_instruct.yaml │ │ │ ├── humaneval_instruct_noprefix.yaml │ │ │ ├── humaneval_plus.yaml │ │ │ ├── sanitize_utils.py │ │ │ └── utils.py │ │ ├── ifeval │ │ │ ├── README.md │ │ │ ├── ifeval.yaml │ │ │ ├── instructions.py │ │ │ ├── instructions_registry.py │ │ │ ├── instructions_util.py │ │ │ └── utils.py │ │ ├── mbpp │ │ │ ├── README.md │ │ │ ├── mbpp.yaml │ │ │ ├── mbpp_instruct.yaml │ │ │ ├── mbpp_plus.yaml │ │ │ ├── mbpp_plus_instruct.yaml │ │ │ └── utils.py │ │ ├── minerva_math │ │ │ ├── README.md │ │ │ ├── minerva_math_algebra.yaml │ │ │ ├── minerva_math_counting_and_prob.yaml │ │ │ ├── minerva_math_geometry.yaml │ │ │ ├── minerva_math_intermediate_algebra.yaml │ │ │ ├── minerva_math_num_theory.yaml │ │ │ ├── minerva_math_prealgebra.yaml │ │ │ ├── minerva_math_precalc.yaml │ │ │ └── utils.py │ │ ├── mmlu │ │ │ ├── README.md │ │ │ ├── _generate_configs.py │ │ │ ├── continuation │ │ │ │ ├── _continuation_template_yaml │ │ │ │ ├── _mmlu.yaml │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ ├── mmlu_management.yaml │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ └── mmlu_world_religions.yaml │ │ │ ├── default │ │ │ │ ├── _default_template_yaml │ │ │ │ ├── _mmlu.yaml │ │ │ │ ├── _mmlu_humanities.yaml │ │ │ │ ├── _mmlu_other.yaml │ │ │ │ ├── _mmlu_social_sciences.yaml │ │ │ │ ├── _mmlu_stem.yaml │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ ├── mmlu_management.yaml │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ └── mmlu_world_religions.yaml │ │ │ ├── flan_cot_fewshot │ │ │ │ ├── _cot_prompts.json │ │ │ │ ├── _mmlu.yaml │ │ │ │ ├── _mmlu_flan_cot_fewshot_template_yaml │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ ├── mmlu_management.yaml │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ └── mmlu_world_religions.yaml │ │ │ ├── flan_cot_zeroshot │ │ │ │ ├── _mmlu.yaml │ │ │ │ ├── _mmlu_flan_cot_zeroshot_template_yaml │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ ├── mmlu_management.yaml │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ ├── mmlu_world_religions.yaml │ │ │ │ └── utils.py │ │ │ ├── flan_n_shot │ │ │ │ ├── generative │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ ├── _mmlu_flan_generative_template_yaml │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ ├── mmlu_world_religions.yaml │ │ │ │ │ └── utils.py │ │ │ │ └── loglikelihood │ │ │ │ │ ├── _mmlu.yaml │ │ │ │ │ ├── _mmlu_flan_loglikelihood_template_yaml │ │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ │ ├── mmlu_management.yaml │ │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ │ └── mmlu_world_religions.yaml │ │ │ └── generative │ │ │ │ ├── _default_template_yaml │ │ │ │ ├── _mmlu.yaml │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ ├── mmlu_high_school_european_history.yaml │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ ├── mmlu_high_school_government_and_politics.yaml │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ ├── mmlu_management.yaml │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ └── mmlu_world_religions.yaml │ │ └── mmlu_pro │ │ │ ├── README.md │ │ │ ├── _default_template_yaml │ │ │ ├── _mmlu_pro.yaml │ │ │ ├── mmlu_pro_biology.yaml │ │ │ ├── mmlu_pro_business.yaml │ │ │ ├── mmlu_pro_chemistry.yaml │ │ │ ├── mmlu_pro_computer_science.yaml │ │ │ ├── mmlu_pro_economics.yaml │ │ │ ├── mmlu_pro_engineering.yaml │ │ │ ├── mmlu_pro_health.yaml │ │ │ ├── mmlu_pro_history.yaml │ │ │ ├── mmlu_pro_law.yaml │ │ │ ├── mmlu_pro_math.yaml │ │ │ ├── mmlu_pro_other.yaml │ │ │ ├── mmlu_pro_philosophy.yaml │ │ │ ├── mmlu_pro_physics.yaml │ │ │ ├── mmlu_pro_psychology.yaml │ │ │ └── utils.py │ └── utils.py ├── pyproject.toml ├── requirements.txt └── setup.py └── imgs └── example_gradio.gif /eval/eval_dream_mc.sh: -------------------------------------------------------------------------------- 1 | 2 | tasks="mmlu arc_easy arc_challenge hellaswag piqa gpqa_main_n_shot winogrande race" 3 | nshots="5 0 0 0 0 5 5 0" 4 | # tasks="mmlu" 5 | # nshots="5" 6 | 7 | # Create arrays from space-separated strings 8 | read -ra TASKS_ARRAY <<< "$tasks" 9 | read -ra NSHOTS_ARRAY <<< "$nshots" 10 | 11 | # Iterate through the arrays 12 | for i in "${!TASKS_ARRAY[@]}"; do 13 | output_path=evals_results/${TASKS_ARRAY[$i]}-ns${NSHOTS_ARRAY[$i]} 14 | echo "Task: ${TASKS_ARRAY[$i]}, Shots: ${NSHOTS_ARRAY[$i]}; Output: $output_path" 15 | accelerate launch --main_process_port 29510 eval.py --model dream \ 16 | --model_args pretrained=Dream-org/Dream-v0-Base-7B,add_bos_token=true \ 17 | --tasks ${TASKS_ARRAY[$i]} \ 18 | --batch_size 32 \ 19 | --output_path $output_path \ 20 | --num_fewshot ${NSHOTS_ARRAY[$i]} \ 21 | --log_samples \ 22 | --confirm_run_unsafe_code 23 | done 24 | -------------------------------------------------------------------------------- /eval_instruct/.gitignore: -------------------------------------------------------------------------------- 1 | env 2 | *.pyc 3 | output/ 4 | output5/ 5 | data/ 6 | lm_cache 7 | .idea 8 | build 9 | dist 10 | *.egg-info 11 | venv 12 | .venv/ 13 | .vscode/ 14 | temp 15 | __pycache__ 16 | .ipynb_checkpoints 17 | temp 18 | test_logs/ 19 | # IPython 20 | profile_default/ 21 | ipython_config.py 22 | # don't track (the default location of) the cached requests 23 | lm_eval/caching/.cache 24 | # don't track files created by wandb 25 | wandb 26 | examples/wandb 27 | -------------------------------------------------------------------------------- /eval_instruct/README.md: -------------------------------------------------------------------------------- 1 | # Dream-Instruct Evaluation Toolkit 2 | This toolkit contains the code Dream-Instruct models make use of for evaluation. 3 | 4 | ## Quickstart 5 | To install the toolkit, run: 6 | ``` 7 | pip install -e ".[ifeval,math]" 8 | ``` 9 | 10 | We provide a script to evaluate [Dream-org/Dream-v0-Instruct-7B](https://huggingface.co/Dream-org/Dream-v0-Instruct-7B): 11 | ``` 12 | bash eval.sh 13 | ``` 14 | 15 | ## Acknowledgement 16 | This is a fork of [EleutherAI/lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness/tree/main). 17 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from .evaluator import evaluate, simple_evaluate 5 | 6 | 7 | __version__ = "0.4.8" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUNLP/Dream/e244fb6804dc8884bba01a3dfec69ff5dcb64d4e/eval_instruct/lm_eval/api/__init__.py -------------------------------------------------------------------------------- /eval_instruct/lm_eval/caching/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUNLP/Dream/e244fb6804dc8884bba01a3dfec69ff5dcb64d4e/eval_instruct/lm_eval/caching/__init__.py -------------------------------------------------------------------------------- /eval_instruct/lm_eval/decontamination/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUNLP/Dream/e244fb6804dc8884bba01a3dfec69ff5dcb64d4e/eval_instruct/lm_eval/decontamination/__init__.py -------------------------------------------------------------------------------- /eval_instruct/lm_eval/filters/__init__.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from typing import List 3 | 4 | from lm_eval.api.filter import FilterEnsemble 5 | from lm_eval.api.registry import get_filter 6 | 7 | from . import custom, extraction, selection, transformation 8 | 9 | 10 | def build_filter_ensemble( 11 | filter_name: str, components: List[List[str]] 12 | ) -> FilterEnsemble: 13 | """ 14 | Create a filtering pipeline. 15 | """ 16 | filters = [] 17 | for function, kwargs in components: 18 | if kwargs is None: 19 | kwargs = {} 20 | # create a filter given its name in the registry 21 | f = partial(get_filter(function), **kwargs) 22 | # add the filter as a pipeline step 23 | filters.append(f) 24 | 25 | return FilterEnsemble(name=filter_name, filters=filters) 26 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/filters/custom.py: -------------------------------------------------------------------------------- 1 | from lm_eval.api.filter import Filter 2 | from lm_eval.api.registry import register_filter 3 | 4 | 5 | @register_filter("custom") 6 | class CustomFilter(Filter): 7 | """ 8 | Custom filter that applies a custom, user-defined function to the model responses. 9 | """ 10 | 11 | def __init__(self, **kwargs) -> None: 12 | self.filter_fn = kwargs.pop("filter_fn") 13 | 14 | super().__init__(**kwargs) 15 | 16 | def apply(self, resps, docs): 17 | return self.filter_fn(resps, docs) 18 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/filters/decontamination.py: -------------------------------------------------------------------------------- 1 | from lm_eval.api.filter import Filter 2 | from lm_eval.api.registry import register_filter 3 | 4 | 5 | @register_filter("decontaminate") 6 | class DecontaminationFilter(Filter): 7 | """ 8 | A filter which evaluates 9 | """ 10 | 11 | name = "track_decontamination" 12 | 13 | def __init__(self, path) -> None: 14 | """ 15 | 16 | TODO: make sure only ever run one time on the train set (should this be cached as a class var? keyed by value for "path"). 17 | should further cache result on a given (task_name, doc_id) 18 | """ 19 | self._decontam_results = None 20 | 21 | def apply(self, resps, docs) -> None: 22 | """ 23 | Return {"no_contamination", "only_contamination"} keys for the 2 different subsets 24 | """ 25 | pass 26 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/loggers/__init__.py: -------------------------------------------------------------------------------- 1 | from .evaluation_tracker import EvaluationTracker 2 | from .wandb_logger import WandbLogger 3 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/models/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ( 2 | diffllm, 3 | huggingface, 4 | ) 5 | 6 | 7 | # TODO: implement __all__ 8 | 9 | 10 | try: 11 | # enable hf hub transfer if available 12 | import hf_transfer # type: ignore # noqa 13 | import huggingface_hub.constants # type: ignore 14 | 15 | huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER = True 16 | except ImportError: 17 | pass 18 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/cot_n_shot/_generate_configs.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from tqdm import tqdm 3 | 4 | 5 | def main() -> None: 6 | subset = ["extended", "diamond", "main"] 7 | setting = "cot_n_shot" 8 | for task in tqdm(subset): 9 | file_name = f"gpqa_{task}_{setting}.yaml" 10 | try: 11 | with open(f"{file_name}", "w") as f: 12 | f.write("# Generated by _generate_configs.py\n") 13 | yaml.dump( 14 | { 15 | "include": f"_gpqa_{setting}_yaml", 16 | "task": f"gpqa_{task}_{setting}", 17 | "dataset_name": f"gpqa_{task}", 18 | }, 19 | f, 20 | ) 21 | except FileExistsError: 22 | pass 23 | 24 | 25 | if __name__ == "__main__": 26 | main() 27 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/cot_n_shot/gpqa_diamond_cot_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_diamond 3 | include: _gpqa_cot_n_shot_yaml 4 | task: gpqa_diamond_cot_n_shot 5 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/cot_n_shot/gpqa_extended_cot_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_extended 3 | include: _gpqa_cot_n_shot_yaml 4 | task: gpqa_extended_cot_n_shot 5 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/cot_n_shot/gpqa_main_cot_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_main 3 | include: _gpqa_cot_n_shot_yaml 4 | task: gpqa_main_cot_n_shot 5 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/cot_zeroshot/_generate_configs.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from tqdm import tqdm 3 | 4 | 5 | def main() -> None: 6 | subset = ["extended", "diamond", "main"] 7 | setting = "cot_zeroshot" 8 | for task in tqdm(subset): 9 | file_name = f"gpqa_{task}_{setting}.yaml" 10 | try: 11 | with open(f"{file_name}", "w") as f: 12 | f.write("# Generated by _generate_configs.py\n") 13 | yaml.dump( 14 | { 15 | "include": f"_gpqa_{setting}_yaml", 16 | "task": f"gpqa_{task}_{setting}", 17 | "dataset_name": f"gpqa_{task}", 18 | }, 19 | f, 20 | ) 21 | except FileExistsError: 22 | pass 23 | 24 | 25 | if __name__ == "__main__": 26 | main() 27 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/cot_zeroshot/gpqa_diamond_cot_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_diamond 3 | include: _gpqa_cot_zeroshot_yaml 4 | task: gpqa_diamond_cot_zeroshot 5 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/cot_zeroshot/gpqa_extended_cot_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_extended 3 | include: _gpqa_cot_zeroshot_yaml 4 | task: gpqa_extended_cot_zeroshot 5 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/cot_zeroshot/gpqa_main_cot_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_main 3 | include: _gpqa_cot_zeroshot_yaml 4 | task: gpqa_main_cot_zeroshot 5 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/generative/_generate_configs.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from tqdm import tqdm 3 | 4 | 5 | def main() -> None: 6 | subset = ["extended", "diamond", "main"] 7 | setting = "generative_n_shot" 8 | for task in tqdm(subset): 9 | file_name = f"gpqa_{task}_{setting}.yaml" 10 | try: 11 | with open(f"{file_name}", "w") as f: 12 | f.write("# Generated by _generate_configs.py\n") 13 | yaml.dump( 14 | { 15 | "include": f"_gpqa_{setting}_yaml", 16 | "task": f"gpqa_{task}_{setting}", 17 | "dataset_name": f"gpqa_{task}", 18 | }, 19 | f, 20 | ) 21 | except FileExistsError: 22 | pass 23 | 24 | 25 | if __name__ == "__main__": 26 | main() 27 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/generative/gpqa_diamond_generative_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_diamond 3 | include: _gpqa_generative_n_shot_yaml 4 | task: gpqa_diamond_generative_n_shot 5 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/generative/gpqa_extended_generative_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_extended 3 | include: _gpqa_generative_n_shot_yaml 4 | task: gpqa_extended_generative_n_shot 5 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/generative/gpqa_main_generative_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_main 3 | include: _gpqa_generative_n_shot_yaml 4 | task: gpqa_main_generative_n_shot 5 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/n_shot/_generate_configs.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from tqdm import tqdm 3 | 4 | 5 | def main() -> None: 6 | subset = ["extended", "diamond", "main"] 7 | 8 | for task in tqdm(subset): 9 | file_name = f"gpqa_{task}_n_shot.yaml" 10 | try: 11 | with open(f"{file_name}", "w") as f: 12 | f.write("# Generated by _generate_configs.py\n") 13 | yaml.dump( 14 | { 15 | "include": "_gpqa_n_shot_yaml", 16 | "task": f"gpqa_{task}_n_shot", 17 | "dataset_name": f"gpqa_{task}", 18 | }, 19 | f, 20 | ) 21 | except FileExistsError: 22 | pass 23 | 24 | 25 | if __name__ == "__main__": 26 | main() 27 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/n_shot/_gpqa_n_shot_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: Idavidrein/gpqa 2 | tag: gpqa 3 | output_type: multiple_choice 4 | process_docs: !function utils.process_docs 5 | training_split: train 6 | # Because huggingface dataset only has train split 7 | validation_split: train 8 | test_split: null 9 | description: "Here are some example questions from experts. Answer the final question yourself, following the format of the previous questions exactly.\n" 10 | doc_to_text: "Question: {{Question}}\nChoices:\n(A) {{choice1}}\n(B) {{choice2}}\n(C) {{choice3}}\n(D) {{choice4}}\nAnswer:" 11 | doc_to_target: answer 12 | doc_to_choice: ["(A)", "(B)", "(C)", "(D)"] 13 | metric_list: 14 | - metric: acc 15 | aggregation: mean 16 | higher_is_better: true 17 | - metric: acc_norm 18 | aggregation: mean 19 | higher_is_better: true 20 | metadata: 21 | version: 2.0 22 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/n_shot/gpqa_diamond_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_diamond 3 | include: _gpqa_n_shot_yaml 4 | task: gpqa_diamond_n_shot 5 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/n_shot/gpqa_extended_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_extended 3 | include: _gpqa_n_shot_yaml 4 | task: gpqa_extended_n_shot 5 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/n_shot/gpqa_main_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_main 3 | include: _gpqa_n_shot_yaml 4 | task: gpqa_main_n_shot 5 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/zeroshot/_generate_configs.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | from tqdm import tqdm 3 | 4 | 5 | def main() -> None: 6 | subset = ["extended", "diamond", "main"] 7 | setting = "zeroshot" 8 | for task in tqdm(subset): 9 | file_name = f"gpqa_{task}_{setting}.yaml" 10 | try: 11 | with open(f"{file_name}", "w") as f: 12 | f.write("# Generated by _generate_configs.py\n") 13 | yaml.dump( 14 | { 15 | "include": f"_gpqa_{setting}_yaml", 16 | "task": f"gpqa_{task}_{setting}", 17 | "dataset_name": f"gpqa_{task}", 18 | }, 19 | f, 20 | ) 21 | except FileExistsError: 22 | pass 23 | 24 | 25 | if __name__ == "__main__": 26 | main() 27 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/zeroshot/_gpqa_zeroshot_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: Idavidrein/gpqa 2 | tag: gpqa 3 | output_type: multiple_choice 4 | process_docs: !function utils.process_docs 5 | training_split: train 6 | # Because huggingface dataset only has train split 7 | validation_split: train 8 | test_split: null 9 | doc_to_text: "What is the correct answer to this question:{{Question}}\nChoices:\n(A) {{choice1}}\n(B) {{choice2}}\n(C) {{choice3}}\n(D) {{choice4}}\nAnswer:" 10 | doc_to_target: answer 11 | doc_to_choice: ["(A)", "(B)", "(C)", "(D)"] 12 | num_fewshot: 0 13 | metric_list: 14 | - metric: acc 15 | aggregation: mean 16 | higher_is_better: true 17 | - metric: acc_norm 18 | aggregation: mean 19 | higher_is_better: true 20 | metadata: 21 | version: 1.0 22 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/zeroshot/gpqa_diamond_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_diamond 3 | include: _gpqa_zeroshot_yaml 4 | task: gpqa_diamond_zeroshot 5 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/zeroshot/gpqa_extended_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_extended 3 | include: _gpqa_zeroshot_yaml 4 | task: gpqa_extended_zeroshot 5 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/gpqa/zeroshot/gpqa_main_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_main 3 | include: _gpqa_zeroshot_yaml 4 | task: gpqa_main_zeroshot 5 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/humaneval/humaneval.yaml: -------------------------------------------------------------------------------- 1 | task: humaneval 2 | dataset_path: openai/openai_humaneval 3 | unsafe_code: true 4 | output_type: generate_until 5 | test_split: test 6 | doc_to_text: "{{prompt}}" 7 | doc_to_target: "{{test}}\ncheck({{entry_point}})" 8 | metric_list: 9 | - metric: !function utils.pass_at_k 10 | aggregation: mean 11 | higher_is_better: true 12 | k: [1] 13 | generation_kwargs: 14 | until: 15 | - "\nclass" 16 | - "\ndef" 17 | - "\n#" 18 | - "\nif" 19 | - "\nprint" 20 | max_gen_toks: 1024 21 | do_sample: false 22 | repeats: 1 23 | num_fewshot: 0 24 | filter_list: 25 | - name: "create_test" 26 | filter: 27 | - function: "custom" 28 | filter_fn: !function utils.build_predictions 29 | metadata: 30 | version: 1.0 -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/humaneval/humaneval_5.yaml: -------------------------------------------------------------------------------- 1 | include: humaneval.yaml 2 | task: humaneval_5 3 | repeats: 5 4 | metric_list: 5 | - metric: !function utils.pass_at_k 6 | aggregation: mean 7 | higher_is_better: true 8 | k: [1,2,3,4,5] 9 | generation_kwargs: 10 | until: 11 | - "\nclass" 12 | - "\ndef" 13 | - "\n#" 14 | - "\nif" 15 | - "\nprint" 16 | max_gen_toks: 1024 17 | do_sample: true 18 | temperature: 0.2 19 | top_p: 0.95 20 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/humaneval/humaneval_5_instruct.yaml: -------------------------------------------------------------------------------- 1 | include: humaneval_5.yaml 2 | task: humaneval_5_instruct 3 | doc_to_text: "Write a solution to the following problem and make sure that it passes the tests:\n```{{prompt}}" 4 | gen_prefix: "Here is the completed function:\n```python\n{{prompt}}\n" 5 | filter_list: 6 | - name: "create_test" 7 | filter: 8 | - function: "custom" 9 | filter_fn: !function utils.build_predictions_instruct 10 | metadata: 11 | version: 2.0 12 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/humaneval/humaneval_5_instruct_noprefix.yaml: -------------------------------------------------------------------------------- 1 | include: humaneval_5.yaml 2 | task: humaneval_5_instruct_noprefix 3 | doc_to_text: "Write a solution to the following problem and make sure that it passes the tests:\n```{{prompt}}" 4 | gen_prefix: "```python\n" 5 | generation_kwargs: 6 | until: 7 | - "\nassert" 8 | - "\n# Test" 9 | filter_list: 10 | - name: "create_test" 11 | filter: 12 | - function: "custom" 13 | filter_fn: !function utils.build_predictions_instruct 14 | metadata: 15 | version: 2.0 16 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/humaneval/humaneval_64.yaml: -------------------------------------------------------------------------------- 1 | include: humaneval.yaml 2 | task: humaneval_64 3 | repeats: 64 4 | metric_list: 5 | - metric: !function utils.pass_at_k 6 | aggregation: mean 7 | higher_is_better: true 8 | k: [2,8,16,32,64] 9 | generation_kwargs: 10 | until: 11 | - "\nclass" 12 | - "\ndef" 13 | - "\n#" 14 | - "\nif" 15 | - "\nprint" 16 | max_gen_toks: 1024 17 | do_sample: true 18 | temperature: 0.2 19 | top_p: 0.95 20 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/humaneval/humaneval_64_instruct.yaml: -------------------------------------------------------------------------------- 1 | include: humaneval_64.yaml 2 | task: humaneval_64_instruct 3 | doc_to_text: "Write a solution to the following problem and make sure that it passes the tests:\n```{{prompt}}" 4 | gen_prefix: "Here is the completed function:\n```python\n{{prompt}}\n" 5 | filter_list: 6 | - name: "create_test" 7 | filter: 8 | - function: "custom" 9 | filter_fn: !function utils.build_predictions_instruct 10 | metadata: 11 | version: 2.0 12 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/humaneval/humaneval_instruct.yaml: -------------------------------------------------------------------------------- 1 | include: humaneval.yaml 2 | task: humaneval_instruct 3 | doc_to_text: "Write a solution to the following problem and make sure that it passes the tests:\n```{{prompt}}" 4 | gen_prefix: "Here is the completed function:\n```python\n{{prompt}}\n" 5 | filter_list: 6 | - name: "create_test" 7 | filter: 8 | - function: "custom" 9 | filter_fn: !function utils.build_predictions_instruct 10 | metadata: 11 | version: 2.0 -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/humaneval/humaneval_instruct_noprefix.yaml: -------------------------------------------------------------------------------- 1 | include: humaneval.yaml 2 | task: humaneval_instruct_noprefix 3 | doc_to_text: "Write a solution to the following problem and make sure that it passes the tests:\n```{{prompt}}```" 4 | gen_prefix: "```python\n" 5 | generation_kwargs: 6 | until: 7 | - "\nassert" 8 | - "\n# Test" 9 | filter_list: 10 | - name: "create_test" 11 | filter: 12 | - function: "custom" 13 | filter_fn: !function utils.build_predictions_instruct 14 | metadata: 15 | version: 2.0 16 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/humaneval/humaneval_plus.yaml: -------------------------------------------------------------------------------- 1 | include: humaneval.yaml 2 | task: humaneval_plus 3 | dataset_path: evalplus/humanevalplus 4 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/ifeval/ifeval.yaml: -------------------------------------------------------------------------------- 1 | task: ifeval 2 | dataset_path: google/IFEval 3 | dataset_name: null 4 | output_type: generate_until 5 | test_split: train 6 | num_fewshot: 0 7 | doc_to_text: prompt 8 | doc_to_target: 0 9 | generation_kwargs: 10 | until: [] 11 | do_sample: false 12 | temperature: 0.0 13 | max_gen_toks: 1280 14 | process_results: !function utils.process_results 15 | metric_list: 16 | - metric: prompt_level_strict_acc 17 | aggregation: mean 18 | higher_is_better: true 19 | - metric: inst_level_strict_acc 20 | aggregation: !function utils.agg_inst_level_acc 21 | higher_is_better: true 22 | - metric: prompt_level_loose_acc 23 | aggregation: mean 24 | higher_is_better: true 25 | - metric: inst_level_loose_acc 26 | aggregation: !function utils.agg_inst_level_acc 27 | higher_is_better: true 28 | metadata: 29 | version: 4.0 30 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mbpp/mbpp.yaml: -------------------------------------------------------------------------------- 1 | task: mbpp 2 | dataset_path: google-research-datasets/mbpp 3 | dataset_name: full 4 | unsafe_code: true 5 | output_type: generate_until 6 | test_split: test 7 | doc_to_text: "You are an expert Python programmer, and here is your task: {{text}} Your code should pass these tests:\n\n{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}\n[BEGIN]\n" 8 | doc_to_target: "{% if is_fewshot is defined %}{{code}}\n[DONE]{% else %}{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}{% endif %}" 9 | target_delimiter: "" 10 | metric_list: 11 | - metric: !function utils.pass_at_1 12 | aggregation: mean 13 | higher_is_better: true 14 | generation_kwargs: 15 | until: 16 | - "[DONE]" 17 | do_sample: false 18 | num_fewshot: 3 19 | fewshot_config: 20 | sampler: first_n 21 | samples: !function utils.list_fewshot_samples 22 | metadata: 23 | version: 1.0 24 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mbpp/mbpp_instruct.yaml: -------------------------------------------------------------------------------- 1 | task: mbpp_instruct 2 | dataset_path: google-research-datasets/mbpp 3 | dataset_name: full 4 | unsafe_code: true 5 | output_type: generate_until 6 | test_split: test 7 | doc_to_text: "You are an expert Python programmer, and here is your task: {{text}} Your code should pass these tests:\n\n{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}" 8 | doc_to_target: "{% if is_fewshot is defined %}{{code}}\n[DONE]{% else %}{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}{% endif %}" 9 | gen_prefix: "Here is the completed function:\n```python\n" 10 | target_delimiter: "" 11 | metric_list: 12 | - metric: !function utils.pass_at_1 13 | aggregation: mean 14 | higher_is_better: true 15 | generation_kwargs: 16 | until: 17 | - "```" 18 | do_sample: false 19 | num_fewshot: 0 20 | fewshot_config: 21 | sampler: first_n 22 | samples: !function utils.list_fewshot_samples 23 | metadata: 24 | version: 1.0 25 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mbpp/mbpp_plus.yaml: -------------------------------------------------------------------------------- 1 | include: mbpp.yaml 2 | task: mbpp_plus 3 | dataset_path: evalplus/mbppplus 4 | dataset_name: null 5 | doc_to_text: "You are an expert Python programmer, and here is your task: {{prompt if prompt is defined else text}} Your code should pass these tests:\n\n{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}\n[BEGIN]\n" 6 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mbpp/mbpp_plus_instruct.yaml: -------------------------------------------------------------------------------- 1 | include: mbpp_instruct.yaml 2 | task: mbpp_plus_instruct 3 | dataset_path: evalplus/mbppplus 4 | dataset_name: null 5 | doc_to_text: "You are an expert Python programmer, and here is your task: {{prompt if prompt is defined else text}} Your code should pass these tests:\n\n{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}" 6 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/minerva_math/minerva_math_algebra.yaml: -------------------------------------------------------------------------------- 1 | tag: 2 | - math_word_problems 3 | task: minerva_math_algebra 4 | dataset_path: EleutherAI/hendrycks_math 5 | process_docs: !function utils.process_docs 6 | dataset_name: algebra 7 | output_type: generate_until 8 | training_split: train 9 | test_split: test 10 | doc_to_text: !function utils.doc_to_text 11 | process_results: !function utils.process_results 12 | doc_to_target: "{{answer if few_shot is undefined else solution}}" 13 | generation_kwargs: 14 | until: 15 | - "Problem:" 16 | do_sample: false 17 | temperature: 0 18 | metric_list: 19 | - metric: exact_match 20 | aggregation: mean 21 | higher_is_better: true 22 | - metric: math_verify 23 | aggregation: mean 24 | higher_is_better: true 25 | num_fewshot: 4 26 | metadata: 27 | version: 2.0 28 | dataset_kwargs: 29 | trust_remote_code: true 30 | fewshot_config: 31 | sampler: first_n 32 | samples: !function utils.list_fewshot_samples 33 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/minerva_math/minerva_math_counting_and_prob.yaml: -------------------------------------------------------------------------------- 1 | include: minerva_math_algebra.yaml 2 | dataset_name: counting_and_probability 3 | task: minerva_math_counting_and_prob 4 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/minerva_math/minerva_math_geometry.yaml: -------------------------------------------------------------------------------- 1 | include: minerva_math_algebra.yaml 2 | dataset_name: geometry 3 | task: minerva_math_geometry 4 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/minerva_math/minerva_math_intermediate_algebra.yaml: -------------------------------------------------------------------------------- 1 | include: minerva_math_algebra.yaml 2 | dataset_name: intermediate_algebra 3 | task: minerva_math_intermediate_algebra 4 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/minerva_math/minerva_math_num_theory.yaml: -------------------------------------------------------------------------------- 1 | include: minerva_math_algebra.yaml 2 | dataset_name: number_theory 3 | task: minerva_math_num_theory 4 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/minerva_math/minerva_math_prealgebra.yaml: -------------------------------------------------------------------------------- 1 | include: minerva_math_algebra.yaml 2 | dataset_name: prealgebra 3 | task: minerva_math_prealgebra 4 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/minerva_math/minerva_math_precalc.yaml: -------------------------------------------------------------------------------- 1 | include: minerva_math_algebra.yaml 2 | dataset_name: precalculus 3 | task: minerva_math_precalc 4 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/_continuation_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split 2 | output_type: multiple_choice 3 | test_split: test 4 | fewshot_split: dev 5 | fewshot_config: 6 | sampler: first_n 7 | doc_to_text: "Question: {{question.strip()}}\nAnswer:" 8 | doc_to_choice: "{{choices}}" 9 | doc_to_target: "{{answer}}" 10 | metadata: 11 | version: 1.0 12 | dataset_kwargs: 13 | trust_remote_code: true 14 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/_mmlu.yaml: -------------------------------------------------------------------------------- 1 | group: mmlu_continuation 2 | group_alias: mmlu (continuation) 3 | task: 4 | - group: stem 5 | task: 6 | - mmlu_continuation_stem 7 | aggregate_metric_list: 8 | - metric: acc 9 | weight_by_size: True 10 | - group: other 11 | task: 12 | - mmlu_continuation_other 13 | aggregate_metric_list: 14 | - metric: acc 15 | weight_by_size: True 16 | - group: social sciences 17 | task: 18 | - mmlu_continuation_social_sciences 19 | aggregate_metric_list: 20 | - metric: acc 21 | weight_by_size: True 22 | - group: humanities 23 | task: 24 | - mmlu_continuation_humanities 25 | aggregate_metric_list: 26 | - metric: acc 27 | weight_by_size: True 28 | aggregate_metric_list: 29 | - metric: acc 30 | weight_by_size: True 31 | metadata: 32 | version: 2 33 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_abstract_algebra.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "abstract_algebra" 2 | "description": "The following are questions (with answers) about abstract\ 3 | \ algebra.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_abstract_algebra" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_anatomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "anatomy" 2 | "description": "The following are questions (with answers) about anatomy.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_anatomy" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_astronomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "astronomy" 2 | "description": "The following are questions (with answers) about astronomy.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_astronomy" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_business_ethics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "business_ethics" 2 | "description": "The following are questions (with answers) about business\ 3 | \ ethics.\n\n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_business_ethics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_clinical_knowledge.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "clinical_knowledge" 2 | "description": "The following are questions (with answers) about clinical\ 3 | \ knowledge.\n\n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_clinical_knowledge" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_college_biology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_biology" 2 | "description": "The following are questions (with answers) about college\ 3 | \ biology.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_college_biology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_college_chemistry.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_chemistry" 2 | "description": "The following are questions (with answers) about college\ 3 | \ chemistry.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_college_chemistry" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_college_computer_science.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_computer_science" 2 | "description": "The following are questions (with answers) about college\ 3 | \ computer science.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_college_computer_science" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_college_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_mathematics" 2 | "description": "The following are questions (with answers) about college\ 3 | \ mathematics.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_college_mathematics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_college_medicine.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_medicine" 2 | "description": "The following are questions (with answers) about college\ 3 | \ medicine.\n\n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_college_medicine" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_college_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_physics" 2 | "description": "The following are questions (with answers) about college\ 3 | \ physics.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_college_physics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_computer_security.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "computer_security" 2 | "description": "The following are questions (with answers) about computer\ 3 | \ security.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_computer_security" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_conceptual_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "conceptual_physics" 2 | "description": "The following are questions (with answers) about conceptual\ 3 | \ physics.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_conceptual_physics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_econometrics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "econometrics" 2 | "description": "The following are questions (with answers) about econometrics.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_econometrics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_electrical_engineering.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "electrical_engineering" 2 | "description": "The following are questions (with answers) about electrical\ 3 | \ engineering.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_electrical_engineering" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_elementary_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "elementary_mathematics" 2 | "description": "The following are questions (with answers) about elementary\ 3 | \ mathematics.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_elementary_mathematics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_formal_logic.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "formal_logic" 2 | "description": "The following are questions (with answers) about formal\ 3 | \ logic.\n\n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_formal_logic" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_global_facts.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "global_facts" 2 | "description": "The following are questions (with answers) about global\ 3 | \ facts.\n\n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_global_facts" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_biology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_biology" 2 | "description": "The following are questions (with answers) about high\ 3 | \ school biology.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_high_school_biology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_chemistry.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_chemistry" 2 | "description": "The following are questions (with answers) about high\ 3 | \ school chemistry.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_high_school_chemistry" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_computer_science.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_computer_science" 2 | "description": "The following are questions (with answers) about high\ 3 | \ school computer science.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_high_school_computer_science" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_european_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_european_history" 2 | "description": "The following are questions (with answers) about high\ 3 | \ school european history.\n\n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_high_school_european_history" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_geography.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_geography" 2 | "description": "The following are questions (with answers) about high\ 3 | \ school geography.\n\n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_high_school_geography" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_government_and_politics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_government_and_politics" 2 | "description": "The following are questions (with answers) about high\ 3 | \ school government and politics.\n\n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_high_school_government_and_politics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_macroeconomics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_macroeconomics" 2 | "description": "The following are questions (with answers) about high\ 3 | \ school macroeconomics.\n\n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_high_school_macroeconomics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_mathematics" 2 | "description": "The following are questions (with answers) about high\ 3 | \ school mathematics.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_high_school_mathematics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_microeconomics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_microeconomics" 2 | "description": "The following are questions (with answers) about high\ 3 | \ school microeconomics.\n\n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_high_school_microeconomics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_physics" 2 | "description": "The following are questions (with answers) about high\ 3 | \ school physics.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_high_school_physics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_psychology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_psychology" 2 | "description": "The following are questions (with answers) about high\ 3 | \ school psychology.\n\n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_high_school_psychology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_statistics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_statistics" 2 | "description": "The following are questions (with answers) about high\ 3 | \ school statistics.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_high_school_statistics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_us_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_us_history" 2 | "description": "The following are questions (with answers) about high\ 3 | \ school us history.\n\n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_high_school_us_history" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_world_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_world_history" 2 | "description": "The following are questions (with answers) about high\ 3 | \ school world history.\n\n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_high_school_world_history" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_human_aging.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "human_aging" 2 | "description": "The following are questions (with answers) about human\ 3 | \ aging.\n\n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_human_aging" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_human_sexuality.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "human_sexuality" 2 | "description": "The following are questions (with answers) about human\ 3 | \ sexuality.\n\n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_human_sexuality" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_international_law.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "international_law" 2 | "description": "The following are questions (with answers) about international\ 3 | \ law.\n\n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_international_law" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_jurisprudence.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "jurisprudence" 2 | "description": "The following are questions (with answers) about jurisprudence.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_jurisprudence" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_logical_fallacies.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "logical_fallacies" 2 | "description": "The following are questions (with answers) about logical\ 3 | \ fallacies.\n\n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_logical_fallacies" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_machine_learning.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "machine_learning" 2 | "description": "The following are questions (with answers) about machine\ 3 | \ learning.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_machine_learning" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_management.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "management" 2 | "description": "The following are questions (with answers) about management.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_management" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_marketing.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "marketing" 2 | "description": "The following are questions (with answers) about marketing.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_marketing" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_medical_genetics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "medical_genetics" 2 | "description": "The following are questions (with answers) about medical\ 3 | \ genetics.\n\n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_medical_genetics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_miscellaneous.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "miscellaneous" 2 | "description": "The following are questions (with answers) about miscellaneous.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_miscellaneous" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_moral_disputes.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "moral_disputes" 2 | "description": "The following are questions (with answers) about moral\ 3 | \ disputes.\n\n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_moral_disputes" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_moral_scenarios.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "moral_scenarios" 2 | "description": "The following are questions (with answers) about moral\ 3 | \ scenarios.\n\n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_moral_scenarios" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_nutrition.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "nutrition" 2 | "description": "The following are questions (with answers) about nutrition.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_nutrition" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_philosophy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "philosophy" 2 | "description": "The following are questions (with answers) about philosophy.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_philosophy" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_prehistory.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "prehistory" 2 | "description": "The following are questions (with answers) about prehistory.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_prehistory" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_professional_accounting.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_accounting" 2 | "description": "The following are questions (with answers) about professional\ 3 | \ accounting.\n\n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_professional_accounting" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_professional_law.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_law" 2 | "description": "The following are questions (with answers) about professional\ 3 | \ law.\n\n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_professional_law" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_professional_medicine.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_medicine" 2 | "description": "The following are questions (with answers) about professional\ 3 | \ medicine.\n\n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_professional_medicine" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_professional_psychology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_psychology" 2 | "description": "The following are questions (with answers) about professional\ 3 | \ psychology.\n\n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_professional_psychology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_public_relations.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "public_relations" 2 | "description": "The following are questions (with answers) about public\ 3 | \ relations.\n\n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_public_relations" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_security_studies.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "security_studies" 2 | "description": "The following are questions (with answers) about security\ 3 | \ studies.\n\n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_security_studies" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_sociology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "sociology" 2 | "description": "The following are questions (with answers) about sociology.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_sociology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_us_foreign_policy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "us_foreign_policy" 2 | "description": "The following are questions (with answers) about us\ 3 | \ foreign policy.\n\n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_us_foreign_policy" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_virology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "virology" 2 | "description": "The following are questions (with answers) about virology.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_virology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_world_religions.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "world_religions" 2 | "description": "The following are questions (with answers) about world\ 3 | \ religions.\n\n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_world_religions" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/_default_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split 2 | test_split: test 3 | fewshot_split: dev 4 | fewshot_config: 5 | sampler: first_n 6 | output_type: multiple_choice 7 | doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:" 8 | doc_to_choice: ["A", "B", "C", "D"] 9 | doc_to_target: answer 10 | metric_list: 11 | - metric: acc 12 | aggregation: mean 13 | higher_is_better: true 14 | metadata: 15 | version: 1.0 16 | dataset_kwargs: 17 | trust_remote_code: true 18 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/_mmlu.yaml: -------------------------------------------------------------------------------- 1 | group: mmlu 2 | task: 3 | - mmlu_stem 4 | - mmlu_other 5 | - mmlu_social_sciences 6 | - mmlu_humanities 7 | aggregate_metric_list: 8 | - metric: acc 9 | weight_by_size: True 10 | metadata: 11 | version: 2 12 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/_mmlu_humanities.yaml: -------------------------------------------------------------------------------- 1 | group: mmlu_humanities 2 | group_alias: humanities 3 | task: 4 | - mmlu_humanities_tasks 5 | aggregate_metric_list: 6 | - metric: acc 7 | weight_by_size: True 8 | metadata: 9 | version: 2 10 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/_mmlu_other.yaml: -------------------------------------------------------------------------------- 1 | group: mmlu_other 2 | group_alias: other 3 | task: 4 | - mmlu_other_tasks 5 | aggregate_metric_list: 6 | - metric: acc 7 | weight_by_size: True 8 | metadata: 9 | version: 2 10 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/_mmlu_social_sciences.yaml: -------------------------------------------------------------------------------- 1 | group: mmlu_social_sciences 2 | group_alias: social sciences 3 | task: 4 | - mmlu_social_sciences_tasks 5 | aggregate_metric_list: 6 | - metric: acc 7 | weight_by_size: True 8 | metadata: 9 | version: 2 10 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/_mmlu_stem.yaml: -------------------------------------------------------------------------------- 1 | group: mmlu_stem 2 | group_alias: stem 3 | task: 4 | - mmlu_stem_tasks 5 | aggregate_metric_list: 6 | - metric: acc 7 | weight_by_size: True 8 | metadata: 9 | version: 2 10 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_abstract_algebra.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "abstract_algebra" 2 | "description": "The following are multiple choice questions (with answers) about abstract\ 3 | \ algebra.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_abstract_algebra" 7 | "task_alias": "abstract_algebra" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_anatomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "anatomy" 2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\ 3 | \n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_anatomy" 7 | "task_alias": "anatomy" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_astronomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "astronomy" 2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\ 3 | \n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_astronomy" 7 | "task_alias": "astronomy" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_business_ethics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "business_ethics" 2 | "description": "The following are multiple choice questions (with answers) about business\ 3 | \ ethics.\n\n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_business_ethics" 7 | "task_alias": "business_ethics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_clinical_knowledge.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "clinical_knowledge" 2 | "description": "The following are multiple choice questions (with answers) about clinical\ 3 | \ knowledge.\n\n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_clinical_knowledge" 7 | "task_alias": "clinical_knowledge" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_college_biology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_biology" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ biology.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_college_biology" 7 | "task_alias": "college_biology" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_college_chemistry.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_chemistry" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ chemistry.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_college_chemistry" 7 | "task_alias": "college_chemistry" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_college_computer_science.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_computer_science" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ computer science.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_college_computer_science" 7 | "task_alias": "college_computer_science" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_college_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_mathematics" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ mathematics.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_college_mathematics" 7 | "task_alias": "college_mathematics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_college_medicine.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_medicine" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ medicine.\n\n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_college_medicine" 7 | "task_alias": "college_medicine" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_college_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_physics" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ physics.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_college_physics" 7 | "task_alias": "college_physics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_computer_security.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "computer_security" 2 | "description": "The following are multiple choice questions (with answers) about computer\ 3 | \ security.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_computer_security" 7 | "task_alias": "computer_security" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_conceptual_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "conceptual_physics" 2 | "description": "The following are multiple choice questions (with answers) about conceptual\ 3 | \ physics.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_conceptual_physics" 7 | "task_alias": "conceptual_physics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_econometrics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "econometrics" 2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\ 3 | \n" 4 | "tag": "mmlu_social_sciences_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_econometrics" 7 | "task_alias": "econometrics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_electrical_engineering.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "electrical_engineering" 2 | "description": "The following are multiple choice questions (with answers) about electrical\ 3 | \ engineering.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_electrical_engineering" 7 | "task_alias": "electrical_engineering" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_elementary_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "elementary_mathematics" 2 | "description": "The following are multiple choice questions (with answers) about elementary\ 3 | \ mathematics.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_elementary_mathematics" 7 | "task_alias": "elementary_mathematics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_formal_logic.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "formal_logic" 2 | "description": "The following are multiple choice questions (with answers) about formal\ 3 | \ logic.\n\n" 4 | "tag": "mmlu_humanities_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_formal_logic" 7 | "task_alias": "formal_logic" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_global_facts.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "global_facts" 2 | "description": "The following are multiple choice questions (with answers) about global\ 3 | \ facts.\n\n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_global_facts" 7 | "task_alias": "global_facts" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_biology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_biology" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school biology.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_biology" 7 | "task_alias": "high_school_biology" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_chemistry.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_chemistry" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school chemistry.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_chemistry" 7 | "task_alias": "high_school_chemistry" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_computer_science.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_computer_science" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school computer science.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_computer_science" 7 | "task_alias": "high_school_computer_science" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_european_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_european_history" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school european history.\n\n" 4 | "tag": "mmlu_humanities_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_european_history" 7 | "task_alias": "high_school_european_history" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_geography.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_geography" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school geography.\n\n" 4 | "tag": "mmlu_social_sciences_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_geography" 7 | "task_alias": "high_school_geography" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_government_and_politics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_government_and_politics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school government and politics.\n\n" 4 | "tag": "mmlu_social_sciences_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_government_and_politics" 7 | "task_alias": "high_school_government_and_politics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_macroeconomics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_macroeconomics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school macroeconomics.\n\n" 4 | "tag": "mmlu_social_sciences_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_macroeconomics" 7 | "task_alias": "high_school_macroeconomics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_mathematics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school mathematics.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_mathematics" 7 | "task_alias": "high_school_mathematics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_microeconomics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_microeconomics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school microeconomics.\n\n" 4 | "tag": "mmlu_social_sciences_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_microeconomics" 7 | "task_alias": "high_school_microeconomics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_physics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school physics.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_physics" 7 | "task_alias": "high_school_physics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_psychology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_psychology" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school psychology.\n\n" 4 | "tag": "mmlu_social_sciences_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_psychology" 7 | "task_alias": "high_school_psychology" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_statistics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_statistics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school statistics.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_statistics" 7 | "task_alias": "high_school_statistics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_us_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_us_history" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school us history.\n\n" 4 | "tag": "mmlu_humanities_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_us_history" 7 | "task_alias": "high_school_us_history" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_world_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_world_history" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school world history.\n\n" 4 | "tag": "mmlu_humanities_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_world_history" 7 | "task_alias": "high_school_world_history" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_human_aging.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "human_aging" 2 | "description": "The following are multiple choice questions (with answers) about human\ 3 | \ aging.\n\n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_human_aging" 7 | "task_alias": "human_aging" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_human_sexuality.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "human_sexuality" 2 | "description": "The following are multiple choice questions (with answers) about human\ 3 | \ sexuality.\n\n" 4 | "tag": "mmlu_social_sciences_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_human_sexuality" 7 | "task_alias": "human_sexuality" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_international_law.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "international_law" 2 | "description": "The following are multiple choice questions (with answers) about international\ 3 | \ law.\n\n" 4 | "tag": "mmlu_humanities_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_international_law" 7 | "task_alias": "international_law" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_jurisprudence.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "jurisprudence" 2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ 3 | \n" 4 | "tag": "mmlu_humanities_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_jurisprudence" 7 | "task_alias": "jurisprudence" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_logical_fallacies.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "logical_fallacies" 2 | "description": "The following are multiple choice questions (with answers) about logical\ 3 | \ fallacies.\n\n" 4 | "tag": "mmlu_humanities_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_logical_fallacies" 7 | "task_alias": "logical_fallacies" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_machine_learning.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "machine_learning" 2 | "description": "The following are multiple choice questions (with answers) about machine\ 3 | \ learning.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_machine_learning" 7 | "task_alias": "machine_learning" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_management.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "management" 2 | "description": "The following are multiple choice questions (with answers) about management.\n\ 3 | \n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_management" 7 | "task_alias": "management" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_marketing.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "marketing" 2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\ 3 | \n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_marketing" 7 | "task_alias": "marketing" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_medical_genetics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "medical_genetics" 2 | "description": "The following are multiple choice questions (with answers) about medical\ 3 | \ genetics.\n\n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_medical_genetics" 7 | "task_alias": "medical_genetics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_miscellaneous.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "miscellaneous" 2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ 3 | \n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_miscellaneous" 7 | "task_alias": "miscellaneous" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_moral_disputes.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "moral_disputes" 2 | "description": "The following are multiple choice questions (with answers) about moral\ 3 | \ disputes.\n\n" 4 | "tag": "mmlu_humanities_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_moral_disputes" 7 | "task_alias": "moral_disputes" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_moral_scenarios.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "moral_scenarios" 2 | "description": "The following are multiple choice questions (with answers) about moral\ 3 | \ scenarios.\n\n" 4 | "tag": "mmlu_humanities_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_moral_scenarios" 7 | "task_alias": "moral_scenarios" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_nutrition.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "nutrition" 2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\ 3 | \n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_nutrition" 7 | "task_alias": "nutrition" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_philosophy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "philosophy" 2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\ 3 | \n" 4 | "tag": "mmlu_humanities_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_philosophy" 7 | "task_alias": "philosophy" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_prehistory.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "prehistory" 2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\ 3 | \n" 4 | "tag": "mmlu_humanities_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_prehistory" 7 | "task_alias": "prehistory" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_professional_accounting.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_accounting" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ accounting.\n\n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_professional_accounting" 7 | "task_alias": "professional_accounting" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_professional_law.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_law" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ law.\n\n" 4 | "tag": "mmlu_humanities_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_professional_law" 7 | "task_alias": "professional_law" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_professional_medicine.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_medicine" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ medicine.\n\n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_professional_medicine" 7 | "task_alias": "professional_medicine" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_professional_psychology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_psychology" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ psychology.\n\n" 4 | "tag": "mmlu_social_sciences_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_professional_psychology" 7 | "task_alias": "professional_psychology" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_public_relations.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "public_relations" 2 | "description": "The following are multiple choice questions (with answers) about public\ 3 | \ relations.\n\n" 4 | "tag": "mmlu_social_sciences_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_public_relations" 7 | "task_alias": "public_relations" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_security_studies.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "security_studies" 2 | "description": "The following are multiple choice questions (with answers) about security\ 3 | \ studies.\n\n" 4 | "tag": "mmlu_social_sciences_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_security_studies" 7 | "task_alias": "security_studies" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_sociology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "sociology" 2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\ 3 | \n" 4 | "tag": "mmlu_social_sciences_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_sociology" 7 | "task_alias": "sociology" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_us_foreign_policy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "us_foreign_policy" 2 | "description": "The following are multiple choice questions (with answers) about us\ 3 | \ foreign policy.\n\n" 4 | "tag": "mmlu_social_sciences_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_us_foreign_policy" 7 | "task_alias": "us_foreign_policy" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_virology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "virology" 2 | "description": "The following are multiple choice questions (with answers) about virology.\n\ 3 | \n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_virology" 7 | "task_alias": "virology" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/default/mmlu_world_religions.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "world_religions" 2 | "description": "The following are multiple choice questions (with answers) about world\ 3 | \ religions.\n\n" 4 | "tag": "mmlu_humanities_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_world_religions" 7 | "task_alias": "world_religions" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_fewshot/_mmlu.yaml: -------------------------------------------------------------------------------- 1 | group: mmlu_flan_cot_fewshot 2 | group_alias: mmlu (flan style, fewshot cot) 3 | task: 4 | - group: stem 5 | task: 6 | - mmlu_flan_cot_fewshot_stem 7 | aggregate_metric_list: 8 | - metric: acc 9 | weight_by_size: True 10 | - group: other 11 | task: 12 | - mmlu_flan_cot_fewshot_other 13 | aggregate_metric_list: 14 | - metric: acc 15 | weight_by_size: True 16 | - group: social sciences 17 | task: 18 | - mmlu_flan_cot_fewshot_social_sciences 19 | aggregate_metric_list: 20 | - metric: acc 21 | weight_by_size: True 22 | - group: humanities 23 | task: 24 | - mmlu_flan_cot_fewshot_humanities 25 | aggregate_metric_list: 26 | - metric: acc 27 | weight_by_size: True 28 | aggregate_metric_list: 29 | - aggregation: mean 30 | metric: exact_match 31 | weight_by_size: True 32 | filter_list: get-answer 33 | metadata: 34 | version: 2 35 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu.yaml: -------------------------------------------------------------------------------- 1 | group: mmlu_flan_cot_zeroshot 2 | group_alias: mmlu (flan style, zeroshot cot) 3 | task: 4 | - group: stem 5 | task: 6 | - mmlu_flan_cot_zeroshot_stem 7 | aggregate_metric_list: 8 | - metric: acc 9 | weight_by_size: True 10 | - group: other 11 | task: 12 | - mmlu_flan_cot_zeroshot_other 13 | aggregate_metric_list: 14 | - metric: acc 15 | weight_by_size: True 16 | - group: social sciences 17 | task: 18 | - mmlu_flan_cot_zeroshot_social_sciences 19 | aggregate_metric_list: 20 | - metric: acc 21 | weight_by_size: True 22 | - group: humanities 23 | task: 24 | - mmlu_flan_cot_zeroshot_humanities 25 | aggregate_metric_list: 26 | - metric: acc 27 | weight_by_size: True 28 | aggregate_metric_list: 29 | - metric: acc 30 | weight_by_size: True 31 | metadata: 32 | version: 2 33 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_abstract_algebra.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "abstract_algebra" 2 | "description": "The following are multiple choice questions (with answers) about abstract\ 3 | \ algebra.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_abstract_algebra" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_anatomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "anatomy" 2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_anatomy" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_astronomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "astronomy" 2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_astronomy" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_business_ethics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "business_ethics" 2 | "description": "The following are multiple choice questions (with answers) about business\ 3 | \ ethics.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_other" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_business_ethics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_clinical_knowledge.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "clinical_knowledge" 2 | "description": "The following are multiple choice questions (with answers) about clinical\ 3 | \ knowledge.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_other" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_clinical_knowledge" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_biology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_biology" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ biology.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_college_biology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_chemistry.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_chemistry" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ chemistry.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_college_chemistry" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_computer_science.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_computer_science" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ computer science.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_college_computer_science" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_mathematics" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ mathematics.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_college_mathematics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_medicine.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_medicine" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ medicine.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_other" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_college_medicine" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_physics" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ physics.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_college_physics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_computer_security.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "computer_security" 2 | "description": "The following are multiple choice questions (with answers) about computer\ 3 | \ security.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_computer_security" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_conceptual_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "conceptual_physics" 2 | "description": "The following are multiple choice questions (with answers) about conceptual\ 3 | \ physics.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_conceptual_physics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_econometrics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "econometrics" 2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_econometrics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_electrical_engineering.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "electrical_engineering" 2 | "description": "The following are multiple choice questions (with answers) about electrical\ 3 | \ engineering.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_electrical_engineering" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_elementary_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "elementary_mathematics" 2 | "description": "The following are multiple choice questions (with answers) about elementary\ 3 | \ mathematics.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_elementary_mathematics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_formal_logic.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "formal_logic" 2 | "description": "The following are multiple choice questions (with answers) about formal\ 3 | \ logic.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_humanities" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_formal_logic" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_global_facts.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "global_facts" 2 | "description": "The following are multiple choice questions (with answers) about global\ 3 | \ facts.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_other" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_global_facts" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_biology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_biology" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school biology.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_high_school_biology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_chemistry.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_chemistry" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school chemistry.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_high_school_chemistry" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_computer_science.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_computer_science" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school computer science.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_high_school_computer_science" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_european_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_european_history" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school european history.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_humanities" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_high_school_european_history" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_geography.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_geography" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school geography.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_high_school_geography" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_government_and_politics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_government_and_politics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school government and politics.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_high_school_government_and_politics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_macroeconomics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_macroeconomics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school macroeconomics.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_high_school_macroeconomics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_mathematics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school mathematics.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_high_school_mathematics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_microeconomics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_microeconomics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school microeconomics.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_high_school_microeconomics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_physics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school physics.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_high_school_physics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_psychology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_psychology" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school psychology.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_high_school_psychology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_statistics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_statistics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school statistics.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_high_school_statistics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_us_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_us_history" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school us history.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_humanities" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_high_school_us_history" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_world_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_world_history" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school world history.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_humanities" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_high_school_world_history" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_aging.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "human_aging" 2 | "description": "The following are multiple choice questions (with answers) about human\ 3 | \ aging.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_other" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_human_aging" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_sexuality.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "human_sexuality" 2 | "description": "The following are multiple choice questions (with answers) about human\ 3 | \ sexuality.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_human_sexuality" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_international_law.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "international_law" 2 | "description": "The following are multiple choice questions (with answers) about international\ 3 | \ law.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_humanities" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_international_law" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_jurisprudence.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "jurisprudence" 2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_humanities" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_jurisprudence" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_logical_fallacies.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "logical_fallacies" 2 | "description": "The following are multiple choice questions (with answers) about logical\ 3 | \ fallacies.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_humanities" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_logical_fallacies" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_machine_learning.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "machine_learning" 2 | "description": "The following are multiple choice questions (with answers) about machine\ 3 | \ learning.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_machine_learning" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_management.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "management" 2 | "description": "The following are multiple choice questions (with answers) about management.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_other" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_management" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_marketing.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "marketing" 2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_other" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_marketing" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_medical_genetics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "medical_genetics" 2 | "description": "The following are multiple choice questions (with answers) about medical\ 3 | \ genetics.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_other" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_medical_genetics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_miscellaneous.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "miscellaneous" 2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_other" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_miscellaneous" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_disputes.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "moral_disputes" 2 | "description": "The following are multiple choice questions (with answers) about moral\ 3 | \ disputes.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_humanities" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_moral_disputes" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_scenarios.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "moral_scenarios" 2 | "description": "The following are multiple choice questions (with answers) about moral\ 3 | \ scenarios.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_humanities" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_moral_scenarios" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_nutrition.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "nutrition" 2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_other" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_nutrition" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_philosophy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "philosophy" 2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_humanities" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_philosophy" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_prehistory.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "prehistory" 2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_humanities" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_prehistory" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_accounting.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_accounting" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ accounting.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_other" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_professional_accounting" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_law.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_law" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ law.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_humanities" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_professional_law" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_medicine.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_medicine" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ medicine.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_other" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_professional_medicine" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_psychology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_psychology" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ psychology.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_professional_psychology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_public_relations.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "public_relations" 2 | "description": "The following are multiple choice questions (with answers) about public\ 3 | \ relations.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_public_relations" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_security_studies.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "security_studies" 2 | "description": "The following are multiple choice questions (with answers) about security\ 3 | \ studies.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_security_studies" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_sociology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "sociology" 2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_sociology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_us_foreign_policy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "us_foreign_policy" 2 | "description": "The following are multiple choice questions (with answers) about us\ 3 | \ foreign policy.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_us_foreign_policy" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_virology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "virology" 2 | "description": "The following are multiple choice questions (with answers) about virology.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_other" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_virology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_world_religions.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "world_religions" 2 | "description": "The following are multiple choice questions (with answers) about world\ 3 | \ religions.\n\n" 4 | "tag": "mmlu_flan_cot_zeroshot_humanities" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_world_religions" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/_mmlu.yaml: -------------------------------------------------------------------------------- 1 | group: mmlu_flan_n_shot_generative 2 | group_alias: mmlu (flan style, generative) 3 | task: 4 | - group: stem 5 | task: 6 | - mmlu_flan_n_shot_generative_stem 7 | aggregate_metric_list: 8 | - metric: acc 9 | weight_by_size: True 10 | - group: other 11 | task: 12 | - mmlu_flan_n_shot_generative_other 13 | aggregate_metric_list: 14 | - metric: acc 15 | weight_by_size: True 16 | - group: social sciences 17 | task: 18 | - mmlu_flan_n_shot_generative_social_sciences 19 | aggregate_metric_list: 20 | - metric: acc 21 | weight_by_size: True 22 | - group: humanities 23 | task: 24 | - mmlu_flan_n_shot_generative_humanities 25 | aggregate_metric_list: 26 | - metric: acc 27 | weight_by_size: True 28 | aggregate_metric_list: 29 | - metric: acc 30 | weight_by_size: True 31 | metadata: 32 | version: 2 33 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_abstract_algebra.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "abstract_algebra" 2 | "description": "The following are multiple choice questions (with answers) about abstract\ 3 | \ algebra.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_abstract_algebra" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_anatomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "anatomy" 2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_anatomy" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_astronomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "astronomy" 2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_astronomy" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_business_ethics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "business_ethics" 2 | "description": "The following are multiple choice questions (with answers) about business\ 3 | \ ethics.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_other" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_business_ethics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_clinical_knowledge.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "clinical_knowledge" 2 | "description": "The following are multiple choice questions (with answers) about clinical\ 3 | \ knowledge.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_other" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_clinical_knowledge" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_biology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_biology" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ biology.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_college_biology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_chemistry.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_chemistry" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ chemistry.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_college_chemistry" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_computer_science.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_computer_science" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ computer science.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_college_computer_science" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_mathematics" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ mathematics.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_college_mathematics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_medicine.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_medicine" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ medicine.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_other" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_college_medicine" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_physics" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ physics.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_college_physics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_computer_security.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "computer_security" 2 | "description": "The following are multiple choice questions (with answers) about computer\ 3 | \ security.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_computer_security" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_conceptual_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "conceptual_physics" 2 | "description": "The following are multiple choice questions (with answers) about conceptual\ 3 | \ physics.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_conceptual_physics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_econometrics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "econometrics" 2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_generative_social_sciences" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_econometrics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_electrical_engineering.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "electrical_engineering" 2 | "description": "The following are multiple choice questions (with answers) about electrical\ 3 | \ engineering.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_electrical_engineering" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_elementary_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "elementary_mathematics" 2 | "description": "The following are multiple choice questions (with answers) about elementary\ 3 | \ mathematics.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_elementary_mathematics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_formal_logic.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "formal_logic" 2 | "description": "The following are multiple choice questions (with answers) about formal\ 3 | \ logic.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_humanities" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_formal_logic" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_global_facts.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "global_facts" 2 | "description": "The following are multiple choice questions (with answers) about global\ 3 | \ facts.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_other" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_global_facts" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_biology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_biology" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school biology.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_high_school_biology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_chemistry.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_chemistry" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school chemistry.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_high_school_chemistry" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_computer_science.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_computer_science" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school computer science.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_high_school_computer_science" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_european_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_european_history" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school european history.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_humanities" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_high_school_european_history" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_geography.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_geography" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school geography.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_social_sciences" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_high_school_geography" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_government_and_politics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_government_and_politics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school government and politics.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_social_sciences" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_high_school_government_and_politics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_macroeconomics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_macroeconomics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school macroeconomics.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_social_sciences" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_high_school_macroeconomics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_mathematics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school mathematics.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_high_school_mathematics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_microeconomics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_microeconomics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school microeconomics.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_social_sciences" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_high_school_microeconomics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_physics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school physics.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_high_school_physics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_psychology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_psychology" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school psychology.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_social_sciences" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_high_school_psychology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_statistics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_statistics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school statistics.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_high_school_statistics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_us_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_us_history" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school us history.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_humanities" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_high_school_us_history" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_world_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_world_history" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school world history.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_humanities" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_high_school_world_history" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_aging.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "human_aging" 2 | "description": "The following are multiple choice questions (with answers) about human\ 3 | \ aging.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_other" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_human_aging" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_sexuality.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "human_sexuality" 2 | "description": "The following are multiple choice questions (with answers) about human\ 3 | \ sexuality.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_social_sciences" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_human_sexuality" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_international_law.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "international_law" 2 | "description": "The following are multiple choice questions (with answers) about international\ 3 | \ law.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_humanities" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_international_law" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_jurisprudence.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "jurisprudence" 2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_generative_humanities" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_jurisprudence" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_logical_fallacies.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "logical_fallacies" 2 | "description": "The following are multiple choice questions (with answers) about logical\ 3 | \ fallacies.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_humanities" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_logical_fallacies" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_machine_learning.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "machine_learning" 2 | "description": "The following are multiple choice questions (with answers) about machine\ 3 | \ learning.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_machine_learning" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_management.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "management" 2 | "description": "The following are multiple choice questions (with answers) about management.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_generative_other" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_management" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_marketing.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "marketing" 2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_generative_other" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_marketing" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_medical_genetics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "medical_genetics" 2 | "description": "The following are multiple choice questions (with answers) about medical\ 3 | \ genetics.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_other" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_medical_genetics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_miscellaneous.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "miscellaneous" 2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_generative_other" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_miscellaneous" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_disputes.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "moral_disputes" 2 | "description": "The following are multiple choice questions (with answers) about moral\ 3 | \ disputes.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_humanities" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_moral_disputes" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_scenarios.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "moral_scenarios" 2 | "description": "The following are multiple choice questions (with answers) about moral\ 3 | \ scenarios.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_humanities" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_moral_scenarios" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_nutrition.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "nutrition" 2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_generative_other" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_nutrition" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_philosophy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "philosophy" 2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_generative_humanities" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_philosophy" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_prehistory.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "prehistory" 2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_generative_humanities" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_prehistory" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_accounting.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_accounting" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ accounting.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_other" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_professional_accounting" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_law.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_law" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ law.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_humanities" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_professional_law" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_medicine.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_medicine" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ medicine.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_other" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_professional_medicine" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_psychology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_psychology" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ psychology.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_social_sciences" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_professional_psychology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_public_relations.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "public_relations" 2 | "description": "The following are multiple choice questions (with answers) about public\ 3 | \ relations.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_social_sciences" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_public_relations" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_security_studies.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "security_studies" 2 | "description": "The following are multiple choice questions (with answers) about security\ 3 | \ studies.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_social_sciences" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_security_studies" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_sociology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "sociology" 2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_generative_social_sciences" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_sociology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_us_foreign_policy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "us_foreign_policy" 2 | "description": "The following are multiple choice questions (with answers) about us\ 3 | \ foreign policy.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_social_sciences" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_us_foreign_policy" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_virology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "virology" 2 | "description": "The following are multiple choice questions (with answers) about virology.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_generative_other" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_virology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_world_religions.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "world_religions" 2 | "description": "The following are multiple choice questions (with answers) about world\ 3 | \ religions.\n\n" 4 | "tag": "mmlu_flan_n_shot_generative_humanities" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_world_religions" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/_mmlu.yaml: -------------------------------------------------------------------------------- 1 | group: mmlu_flan_n_shot_loglikelihood 2 | group_alias: mmlu (flan style, loglikelihood) 3 | task: 4 | - group: stem 5 | task: 6 | - mmlu_flan_n_shot_loglikelihood_stem 7 | aggregate_metric_list: 8 | - metric: acc 9 | weight_by_size: True 10 | - group: other 11 | task: 12 | - mmlu_flan_n_shot_loglikelihood_other 13 | aggregate_metric_list: 14 | - metric: acc 15 | weight_by_size: True 16 | - group: social sciences 17 | task: 18 | - mmlu_flan_n_shot_loglikelihood_social_sciences 19 | aggregate_metric_list: 20 | - metric: acc 21 | weight_by_size: True 22 | - group: humanities 23 | task: 24 | - mmlu_flan_n_shot_loglikelihood_humanities 25 | aggregate_metric_list: 26 | - metric: acc 27 | weight_by_size: True 28 | aggregate_metric_list: 29 | - metric: acc 30 | weight_by_size: True 31 | metadata: 32 | version: 2 33 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/_mmlu_flan_loglikelihood_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split 2 | test_split: test 3 | fewshot_split: dev 4 | fewshot_config: 5 | sampler: first_n 6 | output_type: multiple_choice 7 | doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA:" 8 | doc_to_choice: ["(A)", "(B)", "(C)", "(D)"] 9 | doc_to_target: answer 10 | metric_list: 11 | - metric: acc 12 | aggregation: mean 13 | higher_is_better: true 14 | metadata: 15 | version: 2.0 16 | dataset_kwargs: 17 | trust_remote_code: true 18 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_abstract_algebra.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "abstract_algebra" 2 | "description": "The following are multiple choice questions (with answers) about abstract\ 3 | \ algebra.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_abstract_algebra" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_anatomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "anatomy" 2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_anatomy" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_astronomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "astronomy" 2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_astronomy" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_business_ethics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "business_ethics" 2 | "description": "The following are multiple choice questions (with answers) about business\ 3 | \ ethics.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_other" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_business_ethics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_clinical_knowledge.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "clinical_knowledge" 2 | "description": "The following are multiple choice questions (with answers) about clinical\ 3 | \ knowledge.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_other" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_clinical_knowledge" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_biology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_biology" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ biology.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_college_biology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_chemistry.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_chemistry" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ chemistry.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_college_chemistry" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_computer_science.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_computer_science" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ computer science.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_college_computer_science" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_mathematics" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ mathematics.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_college_mathematics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_medicine.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_medicine" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ medicine.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_other" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_college_medicine" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_physics" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ physics.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_college_physics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_computer_security.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "computer_security" 2 | "description": "The following are multiple choice questions (with answers) about computer\ 3 | \ security.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_computer_security" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_conceptual_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "conceptual_physics" 2 | "description": "The following are multiple choice questions (with answers) about conceptual\ 3 | \ physics.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_conceptual_physics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_econometrics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "econometrics" 2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_econometrics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_electrical_engineering.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "electrical_engineering" 2 | "description": "The following are multiple choice questions (with answers) about electrical\ 3 | \ engineering.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_electrical_engineering" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_elementary_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "elementary_mathematics" 2 | "description": "The following are multiple choice questions (with answers) about elementary\ 3 | \ mathematics.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_elementary_mathematics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_formal_logic.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "formal_logic" 2 | "description": "The following are multiple choice questions (with answers) about formal\ 3 | \ logic.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_formal_logic" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_global_facts.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "global_facts" 2 | "description": "The following are multiple choice questions (with answers) about global\ 3 | \ facts.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_other" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_global_facts" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_biology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_biology" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school biology.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_biology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_chemistry.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_chemistry" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school chemistry.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_chemistry" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_computer_science.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_computer_science" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school computer science.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_computer_science" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_european_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_european_history" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school european history.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_european_history" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_geography.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_geography" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school geography.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_geography" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_government_and_politics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_government_and_politics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school government and politics.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_government_and_politics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_macroeconomics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_macroeconomics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school macroeconomics.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_macroeconomics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_mathematics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school mathematics.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_mathematics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_microeconomics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_microeconomics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school microeconomics.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_microeconomics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_physics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school physics.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_physics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_psychology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_psychology" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school psychology.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_psychology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_statistics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_statistics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school statistics.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_statistics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_us_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_us_history" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school us history.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_us_history" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_world_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_world_history" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school world history.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_world_history" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_aging.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "human_aging" 2 | "description": "The following are multiple choice questions (with answers) about human\ 3 | \ aging.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_other" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_human_aging" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_sexuality.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "human_sexuality" 2 | "description": "The following are multiple choice questions (with answers) about human\ 3 | \ sexuality.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_human_sexuality" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_international_law.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "international_law" 2 | "description": "The following are multiple choice questions (with answers) about international\ 3 | \ law.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_international_law" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_jurisprudence.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "jurisprudence" 2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_jurisprudence" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_logical_fallacies.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "logical_fallacies" 2 | "description": "The following are multiple choice questions (with answers) about logical\ 3 | \ fallacies.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_logical_fallacies" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_machine_learning.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "machine_learning" 2 | "description": "The following are multiple choice questions (with answers) about machine\ 3 | \ learning.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_machine_learning" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_management.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "management" 2 | "description": "The following are multiple choice questions (with answers) about management.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_other" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_management" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_marketing.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "marketing" 2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_other" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_marketing" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_medical_genetics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "medical_genetics" 2 | "description": "The following are multiple choice questions (with answers) about medical\ 3 | \ genetics.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_other" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_medical_genetics" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_miscellaneous.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "miscellaneous" 2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_other" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_miscellaneous" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_moral_disputes.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "moral_disputes" 2 | "description": "The following are multiple choice questions (with answers) about moral\ 3 | \ disputes.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_moral_disputes" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_moral_scenarios.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "moral_scenarios" 2 | "description": "The following are multiple choice questions (with answers) about moral\ 3 | \ scenarios.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_moral_scenarios" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_nutrition.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "nutrition" 2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_other" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_nutrition" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_philosophy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "philosophy" 2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_philosophy" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_prehistory.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "prehistory" 2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_prehistory" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_accounting.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_accounting" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ accounting.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_other" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_professional_accounting" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_law.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_law" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ law.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_professional_law" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_medicine.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_medicine" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ medicine.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_other" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_professional_medicine" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_psychology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_psychology" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ psychology.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_professional_psychology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_public_relations.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "public_relations" 2 | "description": "The following are multiple choice questions (with answers) about public\ 3 | \ relations.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_public_relations" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_security_studies.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "security_studies" 2 | "description": "The following are multiple choice questions (with answers) about security\ 3 | \ studies.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_security_studies" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_sociology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "sociology" 2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_sociology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_us_foreign_policy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "us_foreign_policy" 2 | "description": "The following are multiple choice questions (with answers) about us\ 3 | \ foreign policy.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_us_foreign_policy" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_virology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "virology" 2 | "description": "The following are multiple choice questions (with answers) about virology.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_other" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_virology" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_world_religions.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "world_religions" 2 | "description": "The following are multiple choice questions (with answers) about world\ 3 | \ religions.\n\n" 4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities" 5 | "include": "_mmlu_flan_loglikelihood_template_yaml" 6 | "task": "mmlu_flan_n_shot_loglikelihood_world_religions" 7 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_abstract_algebra.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "abstract_algebra" 2 | "description": "The following are multiple choice questions (with answers) about abstract\ 3 | \ algebra.\n\n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_abstract_algebra_generative" 7 | "task_alias": "abstract_algebra" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_anatomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "anatomy" 2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\ 3 | \n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_anatomy_generative" 7 | "task_alias": "anatomy" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_astronomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "astronomy" 2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\ 3 | \n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_astronomy_generative" 7 | "task_alias": "astronomy" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_business_ethics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "business_ethics" 2 | "description": "The following are multiple choice questions (with answers) about business\ 3 | \ ethics.\n\n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_business_ethics_generative" 7 | "task_alias": "business_ethics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_clinical_knowledge.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "clinical_knowledge" 2 | "description": "The following are multiple choice questions (with answers) about clinical\ 3 | \ knowledge.\n\n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_clinical_knowledge_generative" 7 | "task_alias": "clinical_knowledge" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_college_biology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_biology" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ biology.\n\n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_college_biology_generative" 7 | "task_alias": "college_biology" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_college_chemistry.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_chemistry" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ chemistry.\n\n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_college_chemistry_generative" 7 | "task_alias": "college_chemistry" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_college_computer_science.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_computer_science" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ computer science.\n\n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_college_computer_science_generative" 7 | "task_alias": "college_computer_science" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_college_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_mathematics" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ mathematics.\n\n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_college_mathematics_generative" 7 | "task_alias": "college_mathematics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_college_medicine.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_medicine" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ medicine.\n\n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_college_medicine_generative" 7 | "task_alias": "college_medicine" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_college_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_physics" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ physics.\n\n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_college_physics_generative" 7 | "task_alias": "college_physics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_computer_security.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "computer_security" 2 | "description": "The following are multiple choice questions (with answers) about computer\ 3 | \ security.\n\n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_computer_security_generative" 7 | "task_alias": "computer_security" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_conceptual_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "conceptual_physics" 2 | "description": "The following are multiple choice questions (with answers) about conceptual\ 3 | \ physics.\n\n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_conceptual_physics_generative" 7 | "task_alias": "conceptual_physics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_econometrics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "econometrics" 2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\ 3 | \n" 4 | "tag": "mmlu_social_sciences_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_econometrics_generative" 7 | "task_alias": "econometrics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_electrical_engineering.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "electrical_engineering" 2 | "description": "The following are multiple choice questions (with answers) about electrical\ 3 | \ engineering.\n\n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_electrical_engineering_generative" 7 | "task_alias": "electrical_engineering" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_elementary_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "elementary_mathematics" 2 | "description": "The following are multiple choice questions (with answers) about elementary\ 3 | \ mathematics.\n\n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_elementary_mathematics_generative" 7 | "task_alias": "elementary_mathematics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_formal_logic.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "formal_logic" 2 | "description": "The following are multiple choice questions (with answers) about formal\ 3 | \ logic.\n\n" 4 | "tag": "mmlu_humanities_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_formal_logic_generative" 7 | "task_alias": "formal_logic" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_global_facts.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "global_facts" 2 | "description": "The following are multiple choice questions (with answers) about global\ 3 | \ facts.\n\n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_global_facts_generative" 7 | "task_alias": "global_facts" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_biology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_biology" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school biology.\n\n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_biology_generative" 7 | "task_alias": "high_school_biology" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_chemistry.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_chemistry" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school chemistry.\n\n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_chemistry_generative" 7 | "task_alias": "high_school_chemistry" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_computer_science.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_computer_science" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school computer science.\n\n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_computer_science_generative" 7 | "task_alias": "high_school_computer_science" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_european_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_european_history" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school european history.\n\n" 4 | "tag": "mmlu_humanities_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_european_history_generative" 7 | "task_alias": "high_school_european_history" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_geography.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_geography" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school geography.\n\n" 4 | "tag": "mmlu_social_sciences_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_geography_generative" 7 | "task_alias": "high_school_geography" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_government_and_politics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_government_and_politics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school government and politics.\n\n" 4 | "tag": "mmlu_social_sciences_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_government_and_politics_generative" 7 | "task_alias": "high_school_government_and_politics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_macroeconomics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_macroeconomics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school macroeconomics.\n\n" 4 | "tag": "mmlu_social_sciences_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_macroeconomics_generative" 7 | "task_alias": "high_school_macroeconomics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_mathematics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school mathematics.\n\n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_mathematics_generative" 7 | "task_alias": "high_school_mathematics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_microeconomics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_microeconomics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school microeconomics.\n\n" 4 | "tag": "mmlu_social_sciences_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_microeconomics_generative" 7 | "task_alias": "high_school_microeconomics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_physics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school physics.\n\n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_physics_generative" 7 | "task_alias": "high_school_physics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_psychology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_psychology" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school psychology.\n\n" 4 | "tag": "mmlu_social_sciences_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_psychology_generative" 7 | "task_alias": "high_school_psychology" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_statistics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_statistics" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school statistics.\n\n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_statistics_generative" 7 | "task_alias": "high_school_statistics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_us_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_us_history" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school us history.\n\n" 4 | "tag": "mmlu_humanities_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_us_history_generative" 7 | "task_alias": "high_school_us_history" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_world_history.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_world_history" 2 | "description": "The following are multiple choice questions (with answers) about high\ 3 | \ school world history.\n\n" 4 | "tag": "mmlu_humanities_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_high_school_world_history_generative" 7 | "task_alias": "high_school_world_history" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_human_aging.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "human_aging" 2 | "description": "The following are multiple choice questions (with answers) about human\ 3 | \ aging.\n\n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_human_aging_generative" 7 | "task_alias": "human_aging" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_human_sexuality.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "human_sexuality" 2 | "description": "The following are multiple choice questions (with answers) about human\ 3 | \ sexuality.\n\n" 4 | "tag": "mmlu_social_sciences_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_human_sexuality_generative" 7 | "task_alias": "human_sexuality" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_international_law.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "international_law" 2 | "description": "The following are multiple choice questions (with answers) about international\ 3 | \ law.\n\n" 4 | "tag": "mmlu_humanities_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_international_law_generative" 7 | "task_alias": "international_law" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_jurisprudence.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "jurisprudence" 2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ 3 | \n" 4 | "tag": "mmlu_humanities_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_jurisprudence_generative" 7 | "task_alias": "jurisprudence" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_logical_fallacies.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "logical_fallacies" 2 | "description": "The following are multiple choice questions (with answers) about logical\ 3 | \ fallacies.\n\n" 4 | "tag": "mmlu_humanities_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_logical_fallacies_generative" 7 | "task_alias": "logical_fallacies" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_machine_learning.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "machine_learning" 2 | "description": "The following are multiple choice questions (with answers) about machine\ 3 | \ learning.\n\n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_machine_learning_generative" 7 | "task_alias": "machine_learning" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_management.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "management" 2 | "description": "The following are multiple choice questions (with answers) about management.\n\ 3 | \n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_management_generative" 7 | "task_alias": "management" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_marketing.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "marketing" 2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\ 3 | \n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_marketing_generative" 7 | "task_alias": "marketing" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_medical_genetics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "medical_genetics" 2 | "description": "The following are multiple choice questions (with answers) about medical\ 3 | \ genetics.\n\n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_medical_genetics_generative" 7 | "task_alias": "medical_genetics" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_miscellaneous.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "miscellaneous" 2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ 3 | \n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_miscellaneous_generative" 7 | "task_alias": "miscellaneous" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_moral_disputes.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "moral_disputes" 2 | "description": "The following are multiple choice questions (with answers) about moral\ 3 | \ disputes.\n\n" 4 | "tag": "mmlu_humanities_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_moral_disputes_generative" 7 | "task_alias": "moral_disputes" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_moral_scenarios.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "moral_scenarios" 2 | "description": "The following are multiple choice questions (with answers) about moral\ 3 | \ scenarios.\n\n" 4 | "tag": "mmlu_humanities_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_moral_scenarios_generative" 7 | "task_alias": "moral_scenarios" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_nutrition.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "nutrition" 2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\ 3 | \n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_nutrition_generative" 7 | "task_alias": "nutrition" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_philosophy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "philosophy" 2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\ 3 | \n" 4 | "tag": "mmlu_humanities_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_philosophy_generative" 7 | "task_alias": "philosophy" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_prehistory.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "prehistory" 2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\ 3 | \n" 4 | "tag": "mmlu_humanities_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_prehistory_generative" 7 | "task_alias": "prehistory" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_professional_accounting.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_accounting" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ accounting.\n\n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_professional_accounting_generative" 7 | "task_alias": "professional_accounting" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_professional_law.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_law" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ law.\n\n" 4 | "tag": "mmlu_humanities_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_professional_law_generative" 7 | "task_alias": "professional_law" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_professional_medicine.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_medicine" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ medicine.\n\n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_professional_medicine_generative" 7 | "task_alias": "professional_medicine" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_professional_psychology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_psychology" 2 | "description": "The following are multiple choice questions (with answers) about professional\ 3 | \ psychology.\n\n" 4 | "tag": "mmlu_social_sciences_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_professional_psychology_generative" 7 | "task_alias": "professional_psychology" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_public_relations.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "public_relations" 2 | "description": "The following are multiple choice questions (with answers) about public\ 3 | \ relations.\n\n" 4 | "tag": "mmlu_social_sciences_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_public_relations_generative" 7 | "task_alias": "public_relations" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_security_studies.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "security_studies" 2 | "description": "The following are multiple choice questions (with answers) about security\ 3 | \ studies.\n\n" 4 | "tag": "mmlu_social_sciences_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_security_studies_generative" 7 | "task_alias": "security_studies" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_sociology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "sociology" 2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\ 3 | \n" 4 | "tag": "mmlu_social_sciences_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_sociology_generative" 7 | "task_alias": "sociology" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_us_foreign_policy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "us_foreign_policy" 2 | "description": "The following are multiple choice questions (with answers) about us\ 3 | \ foreign policy.\n\n" 4 | "tag": "mmlu_social_sciences_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_us_foreign_policy_generative" 7 | "task_alias": "us_foreign_policy" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_virology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "virology" 2 | "description": "The following are multiple choice questions (with answers) about virology.\n\ 3 | \n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_virology_generative" 7 | "task_alias": "virology" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_world_religions.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "world_religions" 2 | "description": "The following are multiple choice questions (with answers) about world\ 3 | \ religions.\n\n" 4 | "tag": "mmlu_humanities_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_world_religions_generative" 7 | "task_alias": "world_religions" 8 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu_pro/_default_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: TIGER-Lab/MMLU-Pro 2 | test_split: test 3 | fewshot_split: validation 4 | fewshot_config: 5 | sampler: first_n 6 | doc_to_text: !function utils.fewshot_to_text 7 | doc_to_target: "" 8 | output_type: generate_until 9 | doc_to_text: !function utils.doc_to_text 10 | doc_to_target: answer 11 | filter_list: 12 | - name: "custom-extract" 13 | filter: 14 | - function: "regex" 15 | regex_pattern: 'answer is \(?([ABCDEFGHIJ])\)?' 16 | # regex_pattern: r".*[aA]nswer:\s*([A-J])", 17 | - function: "take_first" 18 | generation_kwargs: 19 | until: 20 | - "" 21 | - "Q:" 22 | - "<|im_end|>" 23 | do_sample: false 24 | temperature: 0.0 25 | num_fewshot: 5 26 | metric_list: 27 | - metric: exact_match 28 | aggregation: mean 29 | higher_is_better: true 30 | ignore_case: true 31 | ignore_punctuation: true 32 | metadata: 33 | version: 1.0 34 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu_pro/_mmlu_pro.yaml: -------------------------------------------------------------------------------- 1 | group: mmlu_pro 2 | task: 3 | - mmlu_pro_biology 4 | - mmlu_pro_business 5 | - mmlu_pro_chemistry 6 | - mmlu_pro_computer_science 7 | - mmlu_pro_economics 8 | - mmlu_pro_engineering 9 | - mmlu_pro_health 10 | - mmlu_pro_history 11 | - mmlu_pro_law 12 | - mmlu_pro_math 13 | - mmlu_pro_other 14 | - mmlu_pro_philosophy 15 | - mmlu_pro_physics 16 | - mmlu_pro_psychology 17 | aggregate_metric_list: 18 | - aggregation: mean 19 | metric: exact_match 20 | weight_by_size: true 21 | filter_list: custom-extract 22 | metadata: 23 | version: 2.0 24 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_biology.yaml: -------------------------------------------------------------------------------- 1 | description: "The following are multiple choice questions (with answers) about biology. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" 2 | include: "_default_template_yaml" 3 | task: "mmlu_pro_biology" 4 | task_alias: "biology" 5 | process_docs: !function utils.process_biology 6 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_business.yaml: -------------------------------------------------------------------------------- 1 | description: "The following are multiple choice questions (with answers) about business. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" 2 | include: "_default_template_yaml" 3 | task: "mmlu_pro_business" 4 | task_alias: "business" 5 | process_docs: !function utils.process_business 6 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_chemistry.yaml: -------------------------------------------------------------------------------- 1 | description: "The following are multiple choice questions (with answers) about chemistry. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" 2 | include: "_default_template_yaml" 3 | task: "mmlu_pro_chemistry" 4 | task_alias: "chemistry" 5 | process_docs: !function utils.process_chemistry 6 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_computer_science.yaml: -------------------------------------------------------------------------------- 1 | description: "The following are multiple choice questions (with answers) about computer science. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" 2 | include: "_default_template_yaml" 3 | task: "mmlu_pro_computer_science" 4 | task_alias: "computer_science" 5 | process_docs: !function utils.process_computer_science 6 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_economics.yaml: -------------------------------------------------------------------------------- 1 | description: "The following are multiple choice questions (with answers) about economics. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" 2 | include: "_default_template_yaml" 3 | task: "mmlu_pro_economics" 4 | task_alias: "economics" 5 | process_docs: !function utils.process_economics 6 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_engineering.yaml: -------------------------------------------------------------------------------- 1 | description: "The following are multiple choice questions (with answers) about engineering. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" 2 | include: "_default_template_yaml" 3 | task: "mmlu_pro_engineering" 4 | task_alias: "engineering" 5 | process_docs: !function utils.process_engineering 6 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_health.yaml: -------------------------------------------------------------------------------- 1 | description: "The following are multiple choice questions (with answers) about health. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" 2 | include: "_default_template_yaml" 3 | task: "mmlu_pro_health" 4 | task_alias: "health" 5 | process_docs: !function utils.process_health 6 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_history.yaml: -------------------------------------------------------------------------------- 1 | description: "The following are multiple choice questions (with answers) about history. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" 2 | include: "_default_template_yaml" 3 | task: "mmlu_pro_history" 4 | task_alias: "history" 5 | process_docs: !function utils.process_history 6 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_law.yaml: -------------------------------------------------------------------------------- 1 | description: "The following are multiple choice questions (with answers) about law. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" 2 | include: "_default_template_yaml" 3 | task: "mmlu_pro_law" 4 | task_alias: "law" 5 | process_docs: !function utils.process_law 6 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_math.yaml: -------------------------------------------------------------------------------- 1 | description: "The following are multiple choice questions (with answers) about math. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" 2 | include: "_default_template_yaml" 3 | task: "mmlu_pro_math" 4 | task_alias: "math" 5 | process_docs: !function utils.process_math 6 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_other.yaml: -------------------------------------------------------------------------------- 1 | description: "The following are multiple choice questions (with answers) about other. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" 2 | include: "_default_template_yaml" 3 | task: "mmlu_pro_other" 4 | task_alias: "other" 5 | process_docs: !function utils.process_other 6 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_philosophy.yaml: -------------------------------------------------------------------------------- 1 | description: "The following are multiple choice questions (with answers) about philosophy. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" 2 | include: "_default_template_yaml" 3 | task: "mmlu_pro_philosophy" 4 | task_alias: "philosophy" 5 | process_docs: !function utils.process_philosophy 6 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_physics.yaml: -------------------------------------------------------------------------------- 1 | description: "The following are multiple choice questions (with answers) about physics. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" 2 | include: "_default_template_yaml" 3 | task: "mmlu_pro_physics" 4 | task_alias: "physics" 5 | process_docs: !function utils.process_physics 6 | -------------------------------------------------------------------------------- /eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_psychology.yaml: -------------------------------------------------------------------------------- 1 | description: "The following are multiple choice questions (with answers) about psychology. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n" 2 | include: "_default_template_yaml" 3 | task: "mmlu_pro_psychology" 4 | task_alias: "psychology" 5 | process_docs: !function utils.process_psychology 6 | -------------------------------------------------------------------------------- /eval_instruct/requirements.txt: -------------------------------------------------------------------------------- 1 | -e . 2 | -------------------------------------------------------------------------------- /eval_instruct/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | 4 | # This is to make sure that the package supports editable installs 5 | setuptools.setup() 6 | -------------------------------------------------------------------------------- /imgs/example_gradio.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUNLP/Dream/e244fb6804dc8884bba01a3dfec69ff5dcb64d4e/imgs/example_gradio.gif --------------------------------------------------------------------------------