├── LICENSE
├── README.md
├── app.py
├── demo_batch_completion.py
├── demo_completion.py
├── demo_multiturn_chat.py
├── demo_token_control.py
├── eval
    ├── cd_metric.py
    ├── data
    │   ├── cd3_test.jsonl
    │   ├── cd4_test.jsonl
    │   ├── cd5_test.jsonl
    │   ├── sudoku_4x4_10.jsonl
    │   ├── sudoku_4x4_11.jsonl
    │   ├── sudoku_4x4_12.jsonl
    │   ├── sudoku_4x4_4.jsonl
    │   ├── sudoku_4x4_5.jsonl
    │   ├── sudoku_4x4_6.jsonl
    │   ├── sudoku_4x4_7.jsonl
    │   ├── sudoku_4x4_8.jsonl
    │   ├── sudoku_4x4_9.jsonl
    │   └── trip_planning.json
    ├── eval.py
    ├── eval_dream_gen.sh
    ├── eval_dream_gen_planning.sh
    ├── eval_dream_mc.sh
    ├── eval_planning.py
    ├── postprocess_code.py
    ├── sanitize.py
    ├── sudoku_metric.py
    └── trip_metric.py
├── eval_instruct
    ├── .gitignore
    ├── README.md
    ├── eval.sh
    ├── lm_eval
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── api
    │   │   ├── __init__.py
    │   │   ├── filter.py
    │   │   ├── group.py
    │   │   ├── instance.py
    │   │   ├── metrics.py
    │   │   ├── model.py
    │   │   ├── registry.py
    │   │   ├── samplers.py
    │   │   └── task.py
    │   ├── caching
    │   │   ├── __init__.py
    │   │   └── cache.py
    │   ├── decontamination
    │   │   ├── __init__.py
    │   │   ├── archiver.py
    │   │   ├── decontaminate.py
    │   │   └── janitor.py
    │   ├── evaluator.py
    │   ├── evaluator_utils.py
    │   ├── filters
    │   │   ├── __init__.py
    │   │   ├── custom.py
    │   │   ├── decontamination.py
    │   │   ├── extraction.py
    │   │   ├── selection.py
    │   │   └── transformation.py
    │   ├── loggers
    │   │   ├── __init__.py
    │   │   ├── evaluation_tracker.py
    │   │   ├── utils.py
    │   │   └── wandb_logger.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── diffllm.py
    │   │   ├── dummy.py
    │   │   ├── huggingface.py
    │   │   └── utils.py
    │   ├── prompts
    │   │   └── __init__.py
    │   ├── tasks
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── gpqa
    │   │   │   ├── README.md
    │   │   │   ├── cot_n_shot
    │   │   │   │   ├── _generate_configs.py
    │   │   │   │   ├── _gpqa_cot_n_shot_yaml
    │   │   │   │   ├── gpqa_diamond_cot_n_shot.yaml
    │   │   │   │   ├── gpqa_extended_cot_n_shot.yaml
    │   │   │   │   ├── gpqa_main_cot_n_shot.yaml
    │   │   │   │   └── utils.py
    │   │   │   ├── cot_zeroshot
    │   │   │   │   ├── _generate_configs.py
    │   │   │   │   ├── _gpqa_cot_zeroshot_yaml
    │   │   │   │   ├── gpqa_diamond_cot_zeroshot.yaml
    │   │   │   │   ├── gpqa_extended_cot_zeroshot.yaml
    │   │   │   │   ├── gpqa_main_cot_zeroshot.yaml
    │   │   │   │   └── utils.py
    │   │   │   ├── generative
    │   │   │   │   ├── _generate_configs.py
    │   │   │   │   ├── _gpqa_generative_n_shot_yaml
    │   │   │   │   ├── gpqa_diamond_generative_n_shot.yaml
    │   │   │   │   ├── gpqa_extended_generative_n_shot.yaml
    │   │   │   │   ├── gpqa_main_generative_n_shot.yaml
    │   │   │   │   └── utils.py
    │   │   │   ├── n_shot
    │   │   │   │   ├── _generate_configs.py
    │   │   │   │   ├── _gpqa_n_shot_yaml
    │   │   │   │   ├── gpqa_diamond_n_shot.yaml
    │   │   │   │   ├── gpqa_extended_n_shot.yaml
    │   │   │   │   ├── gpqa_main_n_shot.yaml
    │   │   │   │   └── utils.py
    │   │   │   └── zeroshot
    │   │   │   │   ├── _generate_configs.py
    │   │   │   │   ├── _gpqa_zeroshot_yaml
    │   │   │   │   ├── gpqa_diamond_zeroshot.yaml
    │   │   │   │   ├── gpqa_extended_zeroshot.yaml
    │   │   │   │   ├── gpqa_main_zeroshot.yaml
    │   │   │   │   └── utils.py
    │   │   ├── gsm8k
    │   │   │   ├── README.md
    │   │   │   ├── gsm8k-cot-llama.yaml
    │   │   │   ├── gsm8k-cot-self-consistency.yaml
    │   │   │   ├── gsm8k-cot-zeroshot.yaml
    │   │   │   ├── gsm8k-cot.yaml
    │   │   │   └── gsm8k.yaml
    │   │   ├── humaneval
    │   │   │   ├── README.md
    │   │   │   ├── humaneval.yaml
    │   │   │   ├── humaneval_5.yaml
    │   │   │   ├── humaneval_5_instruct.yaml
    │   │   │   ├── humaneval_5_instruct_noprefix.yaml
    │   │   │   ├── humaneval_64.yaml
    │   │   │   ├── humaneval_64_instruct.yaml
    │   │   │   ├── humaneval_instruct.yaml
    │   │   │   ├── humaneval_instruct_noprefix.yaml
    │   │   │   ├── humaneval_plus.yaml
    │   │   │   ├── sanitize_utils.py
    │   │   │   └── utils.py
    │   │   ├── ifeval
    │   │   │   ├── README.md
    │   │   │   ├── ifeval.yaml
    │   │   │   ├── instructions.py
    │   │   │   ├── instructions_registry.py
    │   │   │   ├── instructions_util.py
    │   │   │   └── utils.py
    │   │   ├── mbpp
    │   │   │   ├── README.md
    │   │   │   ├── mbpp.yaml
    │   │   │   ├── mbpp_instruct.yaml
    │   │   │   ├── mbpp_plus.yaml
    │   │   │   ├── mbpp_plus_instruct.yaml
    │   │   │   └── utils.py
    │   │   ├── minerva_math
    │   │   │   ├── README.md
    │   │   │   ├── minerva_math_algebra.yaml
    │   │   │   ├── minerva_math_counting_and_prob.yaml
    │   │   │   ├── minerva_math_geometry.yaml
    │   │   │   ├── minerva_math_intermediate_algebra.yaml
    │   │   │   ├── minerva_math_num_theory.yaml
    │   │   │   ├── minerva_math_prealgebra.yaml
    │   │   │   ├── minerva_math_precalc.yaml
    │   │   │   └── utils.py
    │   │   ├── mmlu
    │   │   │   ├── README.md
    │   │   │   ├── _generate_configs.py
    │   │   │   ├── continuation
    │   │   │   │   ├── _continuation_template_yaml
    │   │   │   │   ├── _mmlu.yaml
    │   │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   │   ├── mmlu_international_law.yaml
    │   │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   │   ├── mmlu_management.yaml
    │   │   │   │   ├── mmlu_marketing.yaml
    │   │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   │   ├── mmlu_sociology.yaml
    │   │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   │   ├── mmlu_virology.yaml
    │   │   │   │   └── mmlu_world_religions.yaml
    │   │   │   ├── default
    │   │   │   │   ├── _default_template_yaml
    │   │   │   │   ├── _mmlu.yaml
    │   │   │   │   ├── _mmlu_humanities.yaml
    │   │   │   │   ├── _mmlu_other.yaml
    │   │   │   │   ├── _mmlu_social_sciences.yaml
    │   │   │   │   ├── _mmlu_stem.yaml
    │   │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   │   ├── mmlu_international_law.yaml
    │   │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   │   ├── mmlu_management.yaml
    │   │   │   │   ├── mmlu_marketing.yaml
    │   │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   │   ├── mmlu_sociology.yaml
    │   │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   │   ├── mmlu_virology.yaml
    │   │   │   │   └── mmlu_world_religions.yaml
    │   │   │   ├── flan_cot_fewshot
    │   │   │   │   ├── _cot_prompts.json
    │   │   │   │   ├── _mmlu.yaml
    │   │   │   │   ├── _mmlu_flan_cot_fewshot_template_yaml
    │   │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   │   ├── mmlu_international_law.yaml
    │   │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   │   ├── mmlu_management.yaml
    │   │   │   │   ├── mmlu_marketing.yaml
    │   │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   │   ├── mmlu_sociology.yaml
    │   │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   │   ├── mmlu_virology.yaml
    │   │   │   │   └── mmlu_world_religions.yaml
    │   │   │   ├── flan_cot_zeroshot
    │   │   │   │   ├── _mmlu.yaml
    │   │   │   │   ├── _mmlu_flan_cot_zeroshot_template_yaml
    │   │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   │   ├── mmlu_international_law.yaml
    │   │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   │   ├── mmlu_management.yaml
    │   │   │   │   ├── mmlu_marketing.yaml
    │   │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   │   ├── mmlu_sociology.yaml
    │   │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   │   ├── mmlu_virology.yaml
    │   │   │   │   ├── mmlu_world_religions.yaml
    │   │   │   │   └── utils.py
    │   │   │   ├── flan_n_shot
    │   │   │   │   ├── generative
    │   │   │   │   │   ├── _mmlu.yaml
    │   │   │   │   │   ├── _mmlu_flan_generative_template_yaml
    │   │   │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   │   │   ├── mmlu_international_law.yaml
    │   │   │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   │   │   ├── mmlu_management.yaml
    │   │   │   │   │   ├── mmlu_marketing.yaml
    │   │   │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   │   │   ├── mmlu_sociology.yaml
    │   │   │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   │   │   ├── mmlu_virology.yaml
    │   │   │   │   │   ├── mmlu_world_religions.yaml
    │   │   │   │   │   └── utils.py
    │   │   │   │   └── loglikelihood
    │   │   │   │   │   ├── _mmlu.yaml
    │   │   │   │   │   ├── _mmlu_flan_loglikelihood_template_yaml
    │   │   │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   │   │   ├── mmlu_international_law.yaml
    │   │   │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   │   │   ├── mmlu_management.yaml
    │   │   │   │   │   ├── mmlu_marketing.yaml
    │   │   │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   │   │   ├── mmlu_sociology.yaml
    │   │   │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   │   │   ├── mmlu_virology.yaml
    │   │   │   │   │   └── mmlu_world_religions.yaml
    │   │   │   └── generative
    │   │   │   │   ├── _default_template_yaml
    │   │   │   │   ├── _mmlu.yaml
    │   │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   │   ├── mmlu_international_law.yaml
    │   │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   │   ├── mmlu_management.yaml
    │   │   │   │   ├── mmlu_marketing.yaml
    │   │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   │   ├── mmlu_sociology.yaml
    │   │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   │   ├── mmlu_virology.yaml
    │   │   │   │   └── mmlu_world_religions.yaml
    │   │   └── mmlu_pro
    │   │   │   ├── README.md
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── _mmlu_pro.yaml
    │   │   │   ├── mmlu_pro_biology.yaml
    │   │   │   ├── mmlu_pro_business.yaml
    │   │   │   ├── mmlu_pro_chemistry.yaml
    │   │   │   ├── mmlu_pro_computer_science.yaml
    │   │   │   ├── mmlu_pro_economics.yaml
    │   │   │   ├── mmlu_pro_engineering.yaml
    │   │   │   ├── mmlu_pro_health.yaml
    │   │   │   ├── mmlu_pro_history.yaml
    │   │   │   ├── mmlu_pro_law.yaml
    │   │   │   ├── mmlu_pro_math.yaml
    │   │   │   ├── mmlu_pro_other.yaml
    │   │   │   ├── mmlu_pro_philosophy.yaml
    │   │   │   ├── mmlu_pro_physics.yaml
    │   │   │   ├── mmlu_pro_psychology.yaml
    │   │   │   └── utils.py
    │   └── utils.py
    ├── pyproject.toml
    ├── requirements.txt
    └── setup.py
└── imgs
    └── example_gradio.gif


/eval/eval_dream_mc.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | tasks="mmlu arc_easy arc_challenge hellaswag piqa gpqa_main_n_shot winogrande race"
 3 | nshots="5 0 0 0 0 5 5 0"
 4 | # tasks="mmlu"
 5 | # nshots="5"
 6 | 
 7 | # Create arrays from space-separated strings
 8 | read -ra TASKS_ARRAY <<< "$tasks"
 9 | read -ra NSHOTS_ARRAY <<< "$nshots"
10 | 
11 | # Iterate through the arrays
12 | for i in "${!TASKS_ARRAY[@]}"; do
13 |     output_path=evals_results/${TASKS_ARRAY[$i]}-ns${NSHOTS_ARRAY[$i]}
14 |     echo "Task: ${TASKS_ARRAY[$i]}, Shots: ${NSHOTS_ARRAY[$i]}; Output: $output_path"
15 |     accelerate launch --main_process_port 29510 eval.py --model dream \
16 |         --model_args pretrained=Dream-org/Dream-v0-Base-7B,add_bos_token=true \
17 |         --tasks ${TASKS_ARRAY[$i]} \
18 |         --batch_size 32 \
19 |         --output_path $output_path \
20 |         --num_fewshot ${NSHOTS_ARRAY[$i]} \
21 |         --log_samples \
22 |         --confirm_run_unsafe_code 
23 | done
24 | 


--------------------------------------------------------------------------------
/eval_instruct/.gitignore:
--------------------------------------------------------------------------------
 1 | env
 2 | *.pyc
 3 | output/
 4 | output5/
 5 | data/
 6 | lm_cache
 7 | .idea
 8 | build
 9 | dist
10 | *.egg-info
11 | venv
12 | .venv/
13 | .vscode/
14 | temp
15 | __pycache__
16 | .ipynb_checkpoints
17 | temp
18 | test_logs/
19 | # IPython
20 | profile_default/
21 | ipython_config.py
22 | # don't track (the default location of) the cached requests
23 | lm_eval/caching/.cache
24 | # don't track files created by wandb
25 | wandb
26 | examples/wandb
27 | 


--------------------------------------------------------------------------------
/eval_instruct/README.md:
--------------------------------------------------------------------------------
 1 | # Dream-Instruct Evaluation Toolkit
 2 | This toolkit contains the code Dream-Instruct models make use of for evaluation.
 3 | 
 4 | ## Quickstart
 5 | To install the toolkit, run:
 6 | ```
 7 | pip install -e ".[ifeval,math]"
 8 | ```
 9 | 
10 | We provide a script to evaluate [Dream-org/Dream-v0-Instruct-7B](https://huggingface.co/Dream-org/Dream-v0-Instruct-7B):
11 | ```
12 | bash eval.sh
13 | ```
14 | 
15 | ## Acknowledgement
16 | This is a fork of [EleutherAI/lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness/tree/main).
17 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | 
4 | from .evaluator import evaluate, simple_evaluate
5 | 
6 | 
7 | __version__ = "0.4.8"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HKUNLP/Dream/e244fb6804dc8884bba01a3dfec69ff5dcb64d4e/eval_instruct/lm_eval/api/__init__.py


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/caching/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HKUNLP/Dream/e244fb6804dc8884bba01a3dfec69ff5dcb64d4e/eval_instruct/lm_eval/caching/__init__.py


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/decontamination/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HKUNLP/Dream/e244fb6804dc8884bba01a3dfec69ff5dcb64d4e/eval_instruct/lm_eval/decontamination/__init__.py


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/filters/__init__.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | from typing import List
 3 | 
 4 | from lm_eval.api.filter import FilterEnsemble
 5 | from lm_eval.api.registry import get_filter
 6 | 
 7 | from . import custom, extraction, selection, transformation
 8 | 
 9 | 
10 | def build_filter_ensemble(
11 |     filter_name: str, components: List[List[str]]
12 | ) -> FilterEnsemble:
13 |     """
14 |     Create a filtering pipeline.
15 |     """
16 |     filters = []
17 |     for function, kwargs in components:
18 |         if kwargs is None:
19 |             kwargs = {}
20 |         # create a filter given its name in the registry
21 |         f = partial(get_filter(function), **kwargs)
22 |         # add the filter as a pipeline step
23 |         filters.append(f)
24 | 
25 |     return FilterEnsemble(name=filter_name, filters=filters)
26 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/filters/custom.py:
--------------------------------------------------------------------------------
 1 | from lm_eval.api.filter import Filter
 2 | from lm_eval.api.registry import register_filter
 3 | 
 4 | 
 5 | @register_filter("custom")
 6 | class CustomFilter(Filter):
 7 |     """
 8 |     Custom filter that applies a custom, user-defined function to the model responses.
 9 |     """
10 | 
11 |     def __init__(self, **kwargs) -> None:
12 |         self.filter_fn = kwargs.pop("filter_fn")
13 | 
14 |         super().__init__(**kwargs)
15 | 
16 |     def apply(self, resps, docs):
17 |         return self.filter_fn(resps, docs)
18 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/filters/decontamination.py:
--------------------------------------------------------------------------------
 1 | from lm_eval.api.filter import Filter
 2 | from lm_eval.api.registry import register_filter
 3 | 
 4 | 
 5 | @register_filter("decontaminate")
 6 | class DecontaminationFilter(Filter):
 7 |     """
 8 |     A filter which evaluates
 9 |     """
10 | 
11 |     name = "track_decontamination"
12 | 
13 |     def __init__(self, path) -> None:
14 |         """
15 | 
16 |         TODO: make sure only ever run one time on the train set (should this be cached as a class var? keyed by value for "path").
17 |         should further cache result on a given (task_name, doc_id)
18 |         """
19 |         self._decontam_results = None
20 | 
21 |     def apply(self, resps, docs) -> None:
22 |         """
23 |         Return {"no_contamination", "only_contamination"} keys for the 2 different subsets
24 |         """
25 |         pass
26 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/loggers/__init__.py:
--------------------------------------------------------------------------------
1 | from .evaluation_tracker import EvaluationTracker
2 | from .wandb_logger import WandbLogger
3 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import (
 2 |     diffllm,
 3 |     huggingface,
 4 | )
 5 | 
 6 | 
 7 | # TODO: implement __all__
 8 | 
 9 | 
10 | try:
11 |     # enable hf hub transfer if available
12 |     import hf_transfer  # type: ignore # noqa
13 |     import huggingface_hub.constants  # type: ignore
14 | 
15 |     huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER = True
16 | except ImportError:
17 |     pass
18 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/cot_n_shot/_generate_configs.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | from tqdm import tqdm
 3 | 
 4 | 
 5 | def main() -> None:
 6 |     subset = ["extended", "diamond", "main"]
 7 |     setting = "cot_n_shot"
 8 |     for task in tqdm(subset):
 9 |         file_name = f"gpqa_{task}_{setting}.yaml"
10 |         try:
11 |             with open(f"{file_name}", "w") as f:
12 |                 f.write("# Generated by _generate_configs.py\n")
13 |                 yaml.dump(
14 |                     {
15 |                         "include": f"_gpqa_{setting}_yaml",
16 |                         "task": f"gpqa_{task}_{setting}",
17 |                         "dataset_name": f"gpqa_{task}",
18 |                     },
19 |                     f,
20 |                 )
21 |         except FileExistsError:
22 |             pass
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     main()
27 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/cot_n_shot/gpqa_diamond_cot_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_cot_n_shot_yaml
4 | task: gpqa_diamond_cot_n_shot
5 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/cot_n_shot/gpqa_extended_cot_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_cot_n_shot_yaml
4 | task: gpqa_extended_cot_n_shot
5 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/cot_n_shot/gpqa_main_cot_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_cot_n_shot_yaml
4 | task: gpqa_main_cot_n_shot
5 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/cot_zeroshot/_generate_configs.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | from tqdm import tqdm
 3 | 
 4 | 
 5 | def main() -> None:
 6 |     subset = ["extended", "diamond", "main"]
 7 |     setting = "cot_zeroshot"
 8 |     for task in tqdm(subset):
 9 |         file_name = f"gpqa_{task}_{setting}.yaml"
10 |         try:
11 |             with open(f"{file_name}", "w") as f:
12 |                 f.write("# Generated by _generate_configs.py\n")
13 |                 yaml.dump(
14 |                     {
15 |                         "include": f"_gpqa_{setting}_yaml",
16 |                         "task": f"gpqa_{task}_{setting}",
17 |                         "dataset_name": f"gpqa_{task}",
18 |                     },
19 |                     f,
20 |                 )
21 |         except FileExistsError:
22 |             pass
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     main()
27 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/cot_zeroshot/gpqa_diamond_cot_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_cot_zeroshot_yaml
4 | task: gpqa_diamond_cot_zeroshot
5 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/cot_zeroshot/gpqa_extended_cot_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_cot_zeroshot_yaml
4 | task: gpqa_extended_cot_zeroshot
5 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/cot_zeroshot/gpqa_main_cot_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_cot_zeroshot_yaml
4 | task: gpqa_main_cot_zeroshot
5 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/generative/_generate_configs.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | from tqdm import tqdm
 3 | 
 4 | 
 5 | def main() -> None:
 6 |     subset = ["extended", "diamond", "main"]
 7 |     setting = "generative_n_shot"
 8 |     for task in tqdm(subset):
 9 |         file_name = f"gpqa_{task}_{setting}.yaml"
10 |         try:
11 |             with open(f"{file_name}", "w") as f:
12 |                 f.write("# Generated by _generate_configs.py\n")
13 |                 yaml.dump(
14 |                     {
15 |                         "include": f"_gpqa_{setting}_yaml",
16 |                         "task": f"gpqa_{task}_{setting}",
17 |                         "dataset_name": f"gpqa_{task}",
18 |                     },
19 |                     f,
20 |                 )
21 |         except FileExistsError:
22 |             pass
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     main()
27 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/generative/gpqa_diamond_generative_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_generative_n_shot_yaml
4 | task: gpqa_diamond_generative_n_shot
5 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/generative/gpqa_extended_generative_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_generative_n_shot_yaml
4 | task: gpqa_extended_generative_n_shot
5 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/generative/gpqa_main_generative_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_generative_n_shot_yaml
4 | task: gpqa_main_generative_n_shot
5 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/n_shot/_generate_configs.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | from tqdm import tqdm
 3 | 
 4 | 
 5 | def main() -> None:
 6 |     subset = ["extended", "diamond", "main"]
 7 | 
 8 |     for task in tqdm(subset):
 9 |         file_name = f"gpqa_{task}_n_shot.yaml"
10 |         try:
11 |             with open(f"{file_name}", "w") as f:
12 |                 f.write("# Generated by _generate_configs.py\n")
13 |                 yaml.dump(
14 |                     {
15 |                         "include": "_gpqa_n_shot_yaml",
16 |                         "task": f"gpqa_{task}_n_shot",
17 |                         "dataset_name": f"gpqa_{task}",
18 |                     },
19 |                     f,
20 |                 )
21 |         except FileExistsError:
22 |             pass
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     main()
27 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/n_shot/_gpqa_n_shot_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: Idavidrein/gpqa
 2 | tag: gpqa
 3 | output_type: multiple_choice
 4 | process_docs: !function utils.process_docs
 5 | training_split: train
 6 | # Because huggingface dataset only has train split
 7 | validation_split: train
 8 | test_split: null
 9 | description: "Here are some example questions from experts. Answer the final question yourself, following the format of the previous questions exactly.\n"
10 | doc_to_text: "Question: {{Question}}\nChoices:\n(A) {{choice1}}\n(B) {{choice2}}\n(C) {{choice3}}\n(D) {{choice4}}\nAnswer:"
11 | doc_to_target: answer
12 | doc_to_choice: ["(A)", "(B)", "(C)", "(D)"]
13 | metric_list:
14 |   - metric: acc
15 |     aggregation: mean
16 |     higher_is_better: true
17 |   - metric: acc_norm
18 |     aggregation: mean
19 |     higher_is_better: true
20 | metadata:
21 |   version: 2.0
22 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/n_shot/gpqa_diamond_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_n_shot_yaml
4 | task: gpqa_diamond_n_shot
5 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/n_shot/gpqa_extended_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_n_shot_yaml
4 | task: gpqa_extended_n_shot
5 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/n_shot/gpqa_main_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_n_shot_yaml
4 | task: gpqa_main_n_shot
5 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/zeroshot/_generate_configs.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | from tqdm import tqdm
 3 | 
 4 | 
 5 | def main() -> None:
 6 |     subset = ["extended", "diamond", "main"]
 7 |     setting = "zeroshot"
 8 |     for task in tqdm(subset):
 9 |         file_name = f"gpqa_{task}_{setting}.yaml"
10 |         try:
11 |             with open(f"{file_name}", "w") as f:
12 |                 f.write("# Generated by _generate_configs.py\n")
13 |                 yaml.dump(
14 |                     {
15 |                         "include": f"_gpqa_{setting}_yaml",
16 |                         "task": f"gpqa_{task}_{setting}",
17 |                         "dataset_name": f"gpqa_{task}",
18 |                     },
19 |                     f,
20 |                 )
21 |         except FileExistsError:
22 |             pass
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     main()
27 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/zeroshot/_gpqa_zeroshot_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: Idavidrein/gpqa
 2 | tag: gpqa
 3 | output_type: multiple_choice
 4 | process_docs: !function utils.process_docs
 5 | training_split: train
 6 | # Because huggingface dataset only has train split
 7 | validation_split: train
 8 | test_split: null
 9 | doc_to_text: "What is the correct answer to this question:{{Question}}\nChoices:\n(A) {{choice1}}\n(B) {{choice2}}\n(C) {{choice3}}\n(D) {{choice4}}\nAnswer:"
10 | doc_to_target: answer
11 | doc_to_choice: ["(A)", "(B)", "(C)", "(D)"]
12 | num_fewshot: 0
13 | metric_list:
14 |   - metric: acc
15 |     aggregation: mean
16 |     higher_is_better: true
17 |   - metric: acc_norm
18 |     aggregation: mean
19 |     higher_is_better: true
20 | metadata:
21 |   version: 1.0
22 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/zeroshot/gpqa_diamond_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_zeroshot_yaml
4 | task: gpqa_diamond_zeroshot
5 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/zeroshot/gpqa_extended_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_zeroshot_yaml
4 | task: gpqa_extended_zeroshot
5 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/gpqa/zeroshot/gpqa_main_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_zeroshot_yaml
4 | task: gpqa_main_zeroshot
5 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/humaneval/humaneval.yaml:
--------------------------------------------------------------------------------
 1 | task: humaneval
 2 | dataset_path: openai/openai_humaneval
 3 | unsafe_code: true
 4 | output_type: generate_until
 5 | test_split: test
 6 | doc_to_text: "{{prompt}}"
 7 | doc_to_target: "{{test}}\ncheck({{entry_point}})"
 8 | metric_list:
 9 |   - metric: !function utils.pass_at_k
10 |     aggregation: mean
11 |     higher_is_better: true
12 |     k: [1]
13 | generation_kwargs:
14 |   until:
15 |     - "\nclass"
16 |     - "\ndef"
17 |     - "\n#"
18 |     - "\nif"
19 |     - "\nprint"
20 |   max_gen_toks: 1024
21 |   do_sample: false
22 | repeats: 1
23 | num_fewshot: 0
24 | filter_list:
25 |   - name: "create_test"
26 |     filter:
27 |       - function: "custom"
28 |         filter_fn: !function utils.build_predictions
29 | metadata:
30 |   version: 1.0


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/humaneval/humaneval_5.yaml:
--------------------------------------------------------------------------------
 1 | include: humaneval.yaml
 2 | task: humaneval_5
 3 | repeats: 5
 4 | metric_list:
 5 |   - metric: !function utils.pass_at_k
 6 |     aggregation: mean
 7 |     higher_is_better: true
 8 |     k: [1,2,3,4,5]
 9 | generation_kwargs:
10 |   until:
11 |     - "\nclass"
12 |     - "\ndef"
13 |     - "\n#"
14 |     - "\nif"
15 |     - "\nprint"
16 |   max_gen_toks: 1024
17 |   do_sample: true
18 |   temperature: 0.2
19 |   top_p: 0.95
20 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/humaneval/humaneval_5_instruct.yaml:
--------------------------------------------------------------------------------
 1 | include: humaneval_5.yaml
 2 | task: humaneval_5_instruct
 3 | doc_to_text: "Write a solution to the following problem and make sure that it passes the tests:\n```{{prompt}}"
 4 | gen_prefix: "Here is the completed function:\n```python\n{{prompt}}\n"
 5 | filter_list:
 6 |   - name: "create_test"
 7 |     filter:
 8 |       - function: "custom"
 9 |         filter_fn: !function utils.build_predictions_instruct
10 | metadata:
11 |   version: 2.0
12 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/humaneval/humaneval_5_instruct_noprefix.yaml:
--------------------------------------------------------------------------------
 1 | include: humaneval_5.yaml
 2 | task: humaneval_5_instruct_noprefix
 3 | doc_to_text: "Write a solution to the following problem and make sure that it passes the tests:\n```{{prompt}}"
 4 | gen_prefix: "```python\n"
 5 | generation_kwargs:
 6 |   until:
 7 |     - "\nassert"
 8 |     - "\n# Test"
 9 | filter_list:
10 |   - name: "create_test"
11 |     filter:
12 |       - function: "custom"
13 |         filter_fn: !function utils.build_predictions_instruct
14 | metadata:
15 |   version: 2.0
16 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/humaneval/humaneval_64.yaml:
--------------------------------------------------------------------------------
 1 | include: humaneval.yaml
 2 | task: humaneval_64
 3 | repeats: 64
 4 | metric_list:
 5 |   - metric: !function utils.pass_at_k
 6 |     aggregation: mean
 7 |     higher_is_better: true
 8 |     k: [2,8,16,32,64]
 9 | generation_kwargs:
10 |   until:
11 |     - "\nclass"
12 |     - "\ndef"
13 |     - "\n#"
14 |     - "\nif"
15 |     - "\nprint"
16 |   max_gen_toks: 1024
17 |   do_sample: true
18 |   temperature: 0.2
19 |   top_p: 0.95
20 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/humaneval/humaneval_64_instruct.yaml:
--------------------------------------------------------------------------------
 1 | include: humaneval_64.yaml
 2 | task: humaneval_64_instruct
 3 | doc_to_text: "Write a solution to the following problem and make sure that it passes the tests:\n```{{prompt}}"
 4 | gen_prefix: "Here is the completed function:\n```python\n{{prompt}}\n"
 5 | filter_list:
 6 |   - name: "create_test"
 7 |     filter:
 8 |       - function: "custom"
 9 |         filter_fn: !function utils.build_predictions_instruct
10 | metadata:
11 |   version: 2.0
12 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/humaneval/humaneval_instruct.yaml:
--------------------------------------------------------------------------------
 1 | include: humaneval.yaml
 2 | task: humaneval_instruct
 3 | doc_to_text: "Write a solution to the following problem and make sure that it passes the tests:\n```{{prompt}}"
 4 | gen_prefix: "Here is the completed function:\n```python\n{{prompt}}\n"
 5 | filter_list:
 6 |   - name: "create_test"
 7 |     filter:
 8 |       - function: "custom"
 9 |         filter_fn: !function utils.build_predictions_instruct
10 | metadata:
11 |   version: 2.0


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/humaneval/humaneval_instruct_noprefix.yaml:
--------------------------------------------------------------------------------
 1 | include: humaneval.yaml
 2 | task: humaneval_instruct_noprefix
 3 | doc_to_text: "Write a solution to the following problem and make sure that it passes the tests:\n```{{prompt}}```"
 4 | gen_prefix: "```python\n"
 5 | generation_kwargs:
 6 |   until:
 7 |     - "\nassert"
 8 |     - "\n# Test"
 9 | filter_list:
10 |   - name: "create_test"
11 |     filter:
12 |       - function: "custom"
13 |         filter_fn: !function utils.build_predictions_instruct
14 | metadata:
15 |   version: 2.0
16 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/humaneval/humaneval_plus.yaml:
--------------------------------------------------------------------------------
1 | include: humaneval.yaml
2 | task: humaneval_plus
3 | dataset_path: evalplus/humanevalplus
4 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/ifeval/ifeval.yaml:
--------------------------------------------------------------------------------
 1 | task: ifeval
 2 | dataset_path: google/IFEval
 3 | dataset_name: null
 4 | output_type: generate_until
 5 | test_split: train
 6 | num_fewshot: 0
 7 | doc_to_text: prompt
 8 | doc_to_target: 0
 9 | generation_kwargs:
10 |   until: []
11 |   do_sample: false
12 |   temperature: 0.0
13 |   max_gen_toks: 1280
14 | process_results: !function utils.process_results
15 | metric_list:
16 |   - metric: prompt_level_strict_acc
17 |     aggregation: mean
18 |     higher_is_better: true
19 |   - metric: inst_level_strict_acc
20 |     aggregation: !function utils.agg_inst_level_acc
21 |     higher_is_better: true
22 |   - metric: prompt_level_loose_acc
23 |     aggregation: mean
24 |     higher_is_better: true
25 |   - metric: inst_level_loose_acc
26 |     aggregation: !function utils.agg_inst_level_acc
27 |     higher_is_better: true
28 | metadata:
29 |   version: 4.0
30 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mbpp/mbpp.yaml:
--------------------------------------------------------------------------------
 1 | task: mbpp
 2 | dataset_path: google-research-datasets/mbpp
 3 | dataset_name: full
 4 | unsafe_code: true
 5 | output_type: generate_until
 6 | test_split: test
 7 | doc_to_text: "You are an expert Python programmer, and here is your task: {{text}} Your code should pass these tests:\n\n{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}\n[BEGIN]\n"
 8 | doc_to_target: "{% if is_fewshot is defined %}{{code}}\n[DONE]{% else %}{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}{% endif %}"
 9 | target_delimiter: ""
10 | metric_list:
11 |   - metric: !function utils.pass_at_1
12 |     aggregation: mean
13 |     higher_is_better: true
14 | generation_kwargs:
15 |   until:
16 |     - "[DONE]"
17 |   do_sample: false
18 | num_fewshot: 3
19 | fewshot_config:
20 |   sampler: first_n
21 |   samples: !function utils.list_fewshot_samples
22 | metadata:
23 |   version: 1.0
24 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mbpp/mbpp_instruct.yaml:
--------------------------------------------------------------------------------
 1 | task: mbpp_instruct
 2 | dataset_path: google-research-datasets/mbpp
 3 | dataset_name: full
 4 | unsafe_code: true
 5 | output_type: generate_until
 6 | test_split: test
 7 | doc_to_text: "You are an expert Python programmer, and here is your task: {{text}} Your code should pass these tests:\n\n{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}"
 8 | doc_to_target: "{% if is_fewshot is defined %}{{code}}\n[DONE]{% else %}{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}{% endif %}"
 9 | gen_prefix: "Here is the completed function:\n```python\n"
10 | target_delimiter: ""
11 | metric_list:
12 |   - metric: !function utils.pass_at_1
13 |     aggregation: mean
14 |     higher_is_better: true
15 | generation_kwargs:
16 |   until:
17 |     - "```"
18 |   do_sample: false
19 | num_fewshot: 0
20 | fewshot_config:
21 |   sampler: first_n
22 |   samples: !function utils.list_fewshot_samples
23 | metadata:
24 |   version: 1.0
25 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mbpp/mbpp_plus.yaml:
--------------------------------------------------------------------------------
1 | include: mbpp.yaml
2 | task: mbpp_plus
3 | dataset_path: evalplus/mbppplus
4 | dataset_name: null
5 | doc_to_text: "You are an expert Python programmer, and here is your task: {{prompt if prompt is defined else text}} Your code should pass these tests:\n\n{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}\n[BEGIN]\n"
6 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mbpp/mbpp_plus_instruct.yaml:
--------------------------------------------------------------------------------
1 | include: mbpp_instruct.yaml
2 | task: mbpp_plus_instruct
3 | dataset_path: evalplus/mbppplus
4 | dataset_name: null
5 | doc_to_text: "You are an expert Python programmer, and here is your task: {{prompt if prompt is defined else text}} Your code should pass these tests:\n\n{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}"
6 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/minerva_math/minerva_math_algebra.yaml:
--------------------------------------------------------------------------------
 1 | tag:
 2 |   - math_word_problems
 3 | task: minerva_math_algebra
 4 | dataset_path: EleutherAI/hendrycks_math
 5 | process_docs: !function utils.process_docs
 6 | dataset_name: algebra
 7 | output_type: generate_until
 8 | training_split: train
 9 | test_split: test
10 | doc_to_text:  !function utils.doc_to_text
11 | process_results: !function utils.process_results
12 | doc_to_target: "{{answer if few_shot is undefined else solution}}"
13 | generation_kwargs:
14 |   until:
15 |     - "Problem:"
16 |   do_sample: false
17 |   temperature: 0
18 | metric_list:
19 |   - metric: exact_match
20 |     aggregation: mean
21 |     higher_is_better: true
22 |   - metric: math_verify
23 |     aggregation: mean
24 |     higher_is_better: true
25 | num_fewshot: 4
26 | metadata:
27 |   version: 2.0
28 | dataset_kwargs:
29 |   trust_remote_code: true
30 | fewshot_config:
31 |   sampler: first_n
32 |   samples: !function utils.list_fewshot_samples
33 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/minerva_math/minerva_math_counting_and_prob.yaml:
--------------------------------------------------------------------------------
1 | include: minerva_math_algebra.yaml
2 | dataset_name: counting_and_probability
3 | task: minerva_math_counting_and_prob
4 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/minerva_math/minerva_math_geometry.yaml:
--------------------------------------------------------------------------------
1 | include: minerva_math_algebra.yaml
2 | dataset_name: geometry
3 | task: minerva_math_geometry
4 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/minerva_math/minerva_math_intermediate_algebra.yaml:
--------------------------------------------------------------------------------
1 | include: minerva_math_algebra.yaml
2 | dataset_name: intermediate_algebra
3 | task: minerva_math_intermediate_algebra
4 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/minerva_math/minerva_math_num_theory.yaml:
--------------------------------------------------------------------------------
1 | include: minerva_math_algebra.yaml
2 | dataset_name: number_theory
3 | task: minerva_math_num_theory
4 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/minerva_math/minerva_math_prealgebra.yaml:
--------------------------------------------------------------------------------
1 | include: minerva_math_algebra.yaml
2 | dataset_name: prealgebra
3 | task: minerva_math_prealgebra
4 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/minerva_math/minerva_math_precalc.yaml:
--------------------------------------------------------------------------------
1 | include: minerva_math_algebra.yaml
2 | dataset_name: precalculus
3 | task: minerva_math_precalc
4 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/_continuation_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
 2 | output_type: multiple_choice
 3 | test_split: test
 4 | fewshot_split: dev
 5 | fewshot_config:
 6 |   sampler: first_n
 7 | doc_to_text: "Question: {{question.strip()}}\nAnswer:"
 8 | doc_to_choice: "{{choices}}"
 9 | doc_to_target: "{{answer}}"
10 | metadata:
11 |   version: 1.0
12 | dataset_kwargs:
13 |   trust_remote_code: true
14 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/_mmlu.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_continuation
 2 | group_alias: mmlu (continuation)
 3 | task:
 4 |   - group: stem
 5 |     task:
 6 |       - mmlu_continuation_stem
 7 |     aggregate_metric_list:
 8 |       - metric: acc
 9 |         weight_by_size: True
10 |   - group: other
11 |     task:
12 |       - mmlu_continuation_other
13 |     aggregate_metric_list:
14 |       - metric: acc
15 |         weight_by_size: True
16 |   - group: social sciences
17 |     task:
18 |       - mmlu_continuation_social_sciences
19 |     aggregate_metric_list:
20 |       - metric: acc
21 |         weight_by_size: True
22 |   - group: humanities
23 |     task:
24 |       - mmlu_continuation_humanities
25 |     aggregate_metric_list:
26 |       - metric: acc
27 |         weight_by_size: True
28 | aggregate_metric_list:
29 |   - metric: acc
30 |     weight_by_size: True
31 | metadata:
32 |   version: 2
33 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_abstract_algebra"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_anatomy"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_astronomy"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_business_ethics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_clinical_knowledge"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_biology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_chemistry"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_college_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_computer_science"
2 | "description": "The following are questions (with answers) about college\
3 |   \ computer science.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_computer_science"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_mathematics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_medicine"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_physics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_computer_security"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_conceptual_physics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_econometrics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_electrical_engineering.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "electrical_engineering"
2 | "description": "The following are questions (with answers) about electrical\
3 |   \ engineering.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_electrical_engineering"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_elementary_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "elementary_mathematics"
2 | "description": "The following are questions (with answers) about elementary\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_elementary_mathematics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_formal_logic"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_global_facts"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_biology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_chemistry"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school chemistry.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_chemistry"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_computer_science"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school computer science.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_computer_science"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_european_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_european_history"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school european history.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_european_history"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_geography.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_geography"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school geography.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_geography"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_government_and_politics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_government_and_politics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school government and politics.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_government_and_politics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_macroeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_macroeconomics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school macroeconomics.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_macroeconomics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_mathematics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school mathematics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_mathematics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_microeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_microeconomics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school microeconomics.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_microeconomics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_physics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_psychology"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school psychology.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_psychology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_statistics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_statistics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school statistics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_statistics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_us_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_us_history"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school us history.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_us_history"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_high_school_world_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_world_history"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school world history.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_world_history"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_human_aging"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_human_sexuality"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_international_law"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_jurisprudence"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_logical_fallacies"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_machine_learning"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_management"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_marketing"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_medical_genetics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_miscellaneous"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_moral_disputes"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_moral_scenarios"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_nutrition"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_philosophy"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_prehistory"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_professional_accounting.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_accounting"
2 | "description": "The following are questions (with answers) about professional\
3 |   \ accounting.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_professional_accounting"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_professional_law"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_professional_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_medicine"
2 | "description": "The following are questions (with answers) about professional\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_professional_medicine"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_professional_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_psychology"
2 | "description": "The following are questions (with answers) about professional\
3 |   \ psychology.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_professional_psychology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_public_relations"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_security_studies"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_sociology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_us_foreign_policy"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_virology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/continuation/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_world_religions"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
 2 | test_split: test
 3 | fewshot_split: dev
 4 | fewshot_config:
 5 |   sampler: first_n
 6 | output_type: multiple_choice
 7 | doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:"
 8 | doc_to_choice: ["A", "B", "C", "D"]
 9 | doc_to_target: answer
10 | metric_list:
11 |   - metric: acc
12 |     aggregation: mean
13 |     higher_is_better: true
14 | metadata:
15 |   version: 1.0
16 | dataset_kwargs:
17 |   trust_remote_code: true
18 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/_mmlu.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu
 2 | task:
 3 |   - mmlu_stem
 4 |   - mmlu_other
 5 |   - mmlu_social_sciences
 6 |   - mmlu_humanities
 7 | aggregate_metric_list:
 8 |   - metric: acc
 9 |     weight_by_size: True
10 | metadata:
11 |   version: 2
12 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/_mmlu_humanities.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_humanities
 2 | group_alias: humanities
 3 | task:
 4 |   - mmlu_humanities_tasks
 5 | aggregate_metric_list:
 6 |   - metric: acc
 7 |     weight_by_size: True
 8 | metadata:
 9 |   version: 2
10 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/_mmlu_other.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_other
 2 | group_alias: other
 3 | task:
 4 |   - mmlu_other_tasks
 5 | aggregate_metric_list:
 6 |   - metric: acc
 7 |     weight_by_size: True
 8 | metadata:
 9 |   version: 2
10 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/_mmlu_social_sciences.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_social_sciences
 2 | group_alias: social sciences
 3 | task:
 4 |   - mmlu_social_sciences_tasks
 5 | aggregate_metric_list:
 6 |   - metric: acc
 7 |     weight_by_size: True
 8 | metadata:
 9 |   version: 2
10 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/_mmlu_stem.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_stem
 2 | group_alias: stem
 3 | task:
 4 |   - mmlu_stem_tasks
 5 | aggregate_metric_list:
 6 |   - metric: acc
 7 |     weight_by_size: True
 8 | metadata:
 9 |   version: 2
10 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are multiple choice questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_abstract_algebra"
7 | "task_alias": "abstract_algebra"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_anatomy"
7 | "task_alias": "anatomy"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_astronomy"
7 | "task_alias": "astronomy"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_business_ethics"
7 | "task_alias": "business_ethics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are multiple choice questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_clinical_knowledge"
7 | "task_alias": "clinical_knowledge"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_biology"
7 | "task_alias": "college_biology"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_chemistry"
7 | "task_alias": "college_chemistry"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_college_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_computer_science"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ computer science.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_computer_science"
7 | "task_alias": "college_computer_science"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_mathematics"
7 | "task_alias": "college_mathematics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_medicine"
7 | "task_alias": "college_medicine"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_physics"
7 | "task_alias": "college_physics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are multiple choice questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_computer_security"
7 | "task_alias": "computer_security"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are multiple choice questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_conceptual_physics"
7 | "task_alias": "conceptual_physics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_econometrics"
7 | "task_alias": "econometrics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_electrical_engineering.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "electrical_engineering"
2 | "description": "The following are multiple choice questions (with answers) about electrical\
3 |   \ engineering.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_electrical_engineering"
7 | "task_alias": "electrical_engineering"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_elementary_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "elementary_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about elementary\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_elementary_mathematics"
7 | "task_alias": "elementary_mathematics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_formal_logic"
7 | "task_alias": "formal_logic"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_global_facts"
7 | "task_alias": "global_facts"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_biology"
7 | "task_alias": "high_school_biology"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school chemistry.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_chemistry"
7 | "task_alias": "high_school_chemistry"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_computer_science"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school computer science.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_computer_science"
7 | "task_alias": "high_school_computer_science"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_european_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_european_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school european history.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_european_history"
7 | "task_alias": "high_school_european_history"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_geography.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_geography"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school geography.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_geography"
7 | "task_alias": "high_school_geography"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_government_and_politics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_government_and_politics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school government and politics.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_government_and_politics"
7 | "task_alias": "high_school_government_and_politics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_macroeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_macroeconomics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school macroeconomics.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_macroeconomics"
7 | "task_alias": "high_school_macroeconomics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school mathematics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_mathematics"
7 | "task_alias": "high_school_mathematics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_microeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_microeconomics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school microeconomics.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_microeconomics"
7 | "task_alias": "high_school_microeconomics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_physics"
7 | "task_alias": "high_school_physics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_psychology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school psychology.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_psychology"
7 | "task_alias": "high_school_psychology"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_statistics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_statistics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school statistics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_statistics"
7 | "task_alias": "high_school_statistics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_us_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_us_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school us history.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_us_history"
7 | "task_alias": "high_school_us_history"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_high_school_world_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_world_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school world history.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_world_history"
7 | "task_alias": "high_school_world_history"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_human_aging"
7 | "task_alias": "human_aging"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_human_sexuality"
7 | "task_alias": "human_sexuality"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are multiple choice questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_international_law"
7 | "task_alias": "international_law"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_jurisprudence"
7 | "task_alias": "jurisprudence"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are multiple choice questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_logical_fallacies"
7 | "task_alias": "logical_fallacies"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are multiple choice questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_machine_learning"
7 | "task_alias": "machine_learning"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_management"
7 | "task_alias": "management"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_marketing"
7 | "task_alias": "marketing"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are multiple choice questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_medical_genetics"
7 | "task_alias": "medical_genetics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_miscellaneous"
7 | "task_alias": "miscellaneous"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_moral_disputes"
7 | "task_alias": "moral_disputes"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_moral_scenarios"
7 | "task_alias": "moral_scenarios"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_nutrition"
7 | "task_alias": "nutrition"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_philosophy"
7 | "task_alias": "philosophy"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_prehistory"
7 | "task_alias": "prehistory"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_professional_accounting.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_accounting"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ accounting.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_accounting"
7 | "task_alias": "professional_accounting"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_law"
7 | "task_alias": "professional_law"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_professional_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_medicine"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_medicine"
7 | "task_alias": "professional_medicine"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_professional_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_psychology"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ psychology.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_psychology"
7 | "task_alias": "professional_psychology"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are multiple choice questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_public_relations"
7 | "task_alias": "public_relations"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are multiple choice questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_security_studies"
7 | "task_alias": "security_studies"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_sociology"
7 | "task_alias": "sociology"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are multiple choice questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_us_foreign_policy"
7 | "task_alias": "us_foreign_policy"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_virology"
7 | "task_alias": "virology"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/default/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are multiple choice questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_world_religions"
7 | "task_alias": "world_religions"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_fewshot/_mmlu.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_flan_cot_fewshot
 2 | group_alias: mmlu (flan style, fewshot cot)
 3 | task:
 4 |   - group: stem
 5 |     task:
 6 |       - mmlu_flan_cot_fewshot_stem
 7 |     aggregate_metric_list:
 8 |       - metric: acc
 9 |         weight_by_size: True
10 |   - group: other
11 |     task:
12 |       - mmlu_flan_cot_fewshot_other
13 |     aggregate_metric_list:
14 |       - metric: acc
15 |         weight_by_size: True
16 |   - group: social sciences
17 |     task:
18 |       - mmlu_flan_cot_fewshot_social_sciences
19 |     aggregate_metric_list:
20 |       - metric: acc
21 |         weight_by_size: True
22 |   - group: humanities
23 |     task:
24 |       - mmlu_flan_cot_fewshot_humanities
25 |     aggregate_metric_list:
26 |       - metric: acc
27 |         weight_by_size: True
28 | aggregate_metric_list:
29 |   - aggregation: mean
30 |     metric: exact_match
31 |     weight_by_size: True
32 |     filter_list: get-answer
33 | metadata:
34 |   version: 2
35 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_flan_cot_zeroshot
 2 | group_alias: mmlu (flan style, zeroshot cot)
 3 | task:
 4 |   - group: stem
 5 |     task:
 6 |       - mmlu_flan_cot_zeroshot_stem
 7 |     aggregate_metric_list:
 8 |       - metric: acc
 9 |         weight_by_size: True
10 |   - group: other
11 |     task:
12 |       - mmlu_flan_cot_zeroshot_other
13 |     aggregate_metric_list:
14 |       - metric: acc
15 |         weight_by_size: True
16 |   - group: social sciences
17 |     task:
18 |       - mmlu_flan_cot_zeroshot_social_sciences
19 |     aggregate_metric_list:
20 |       - metric: acc
21 |         weight_by_size: True
22 |   - group: humanities
23 |     task:
24 |       - mmlu_flan_cot_zeroshot_humanities
25 |     aggregate_metric_list:
26 |       - metric: acc
27 |         weight_by_size: True
28 | aggregate_metric_list:
29 |   - metric: acc
30 |     weight_by_size: True
31 | metadata:
32 |   version: 2
33 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are multiple choice questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_abstract_algebra"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_anatomy"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_astronomy"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_business_ethics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are multiple choice questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_clinical_knowledge"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_biology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_chemistry"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_computer_science"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ computer science.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_computer_science"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_mathematics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_medicine"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_physics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are multiple choice questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_computer_security"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are multiple choice questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_conceptual_physics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_econometrics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_electrical_engineering.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "electrical_engineering"
2 | "description": "The following are multiple choice questions (with answers) about electrical\
3 |   \ engineering.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_electrical_engineering"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_elementary_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "elementary_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about elementary\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_elementary_mathematics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_formal_logic"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_global_facts"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_biology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school chemistry.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_chemistry"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_computer_science"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school computer science.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_computer_science"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_european_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_european_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school european history.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_european_history"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_geography.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_geography"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school geography.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_geography"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_government_and_politics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_government_and_politics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school government and politics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_government_and_politics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_macroeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_macroeconomics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school macroeconomics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_macroeconomics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school mathematics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_mathematics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_microeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_microeconomics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school microeconomics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_microeconomics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_physics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_psychology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school psychology.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_psychology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_statistics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_statistics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school statistics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_statistics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_us_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_us_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school us history.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_us_history"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_world_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_world_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school world history.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_world_history"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_human_aging"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_human_sexuality"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are multiple choice questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_international_law"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_jurisprudence"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are multiple choice questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_logical_fallacies"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are multiple choice questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_machine_learning"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_management"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_marketing"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are multiple choice questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_medical_genetics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_miscellaneous"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_moral_disputes"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_moral_scenarios"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_nutrition"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_philosophy"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_prehistory"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_accounting.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_accounting"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ accounting.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_professional_accounting"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_professional_law"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_medicine"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_professional_medicine"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_psychology"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ psychology.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_professional_psychology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are multiple choice questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_public_relations"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are multiple choice questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_security_studies"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_sociology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are multiple choice questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_us_foreign_policy"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_virology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are multiple choice questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_world_religions"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/_mmlu.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_flan_n_shot_generative
 2 | group_alias: mmlu (flan style, generative)
 3 | task:
 4 |   - group: stem
 5 |     task:
 6 |       - mmlu_flan_n_shot_generative_stem
 7 |     aggregate_metric_list:
 8 |       - metric: acc
 9 |         weight_by_size: True
10 |   - group: other
11 |     task:
12 |       - mmlu_flan_n_shot_generative_other
13 |     aggregate_metric_list:
14 |       - metric: acc
15 |         weight_by_size: True
16 |   - group: social sciences
17 |     task:
18 |       - mmlu_flan_n_shot_generative_social_sciences
19 |     aggregate_metric_list:
20 |       - metric: acc
21 |         weight_by_size: True
22 |   - group: humanities
23 |     task:
24 |       - mmlu_flan_n_shot_generative_humanities
25 |     aggregate_metric_list:
26 |       - metric: acc
27 |         weight_by_size: True
28 | aggregate_metric_list:
29 |   - metric: acc
30 |     weight_by_size: True
31 | metadata:
32 |   version: 2
33 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are multiple choice questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_abstract_algebra"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_anatomy"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_astronomy"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_business_ethics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are multiple choice questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_clinical_knowledge"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_biology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_chemistry"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_computer_science"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ computer science.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_computer_science"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_mathematics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_medicine"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_physics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are multiple choice questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_computer_security"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are multiple choice questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_conceptual_physics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_econometrics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_electrical_engineering.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "electrical_engineering"
2 | "description": "The following are multiple choice questions (with answers) about electrical\
3 |   \ engineering.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_electrical_engineering"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_elementary_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "elementary_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about elementary\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_elementary_mathematics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_formal_logic"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_global_facts"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_high_school_biology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school chemistry.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_high_school_chemistry"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_computer_science"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school computer science.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_high_school_computer_science"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_european_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_european_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school european history.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_high_school_european_history"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_geography.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_geography"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school geography.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_high_school_geography"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_government_and_politics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_government_and_politics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school government and politics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_high_school_government_and_politics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_macroeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_macroeconomics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school macroeconomics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_high_school_macroeconomics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school mathematics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_high_school_mathematics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_microeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_microeconomics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school microeconomics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_high_school_microeconomics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_high_school_physics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_psychology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school psychology.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_high_school_psychology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_statistics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_statistics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school statistics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_high_school_statistics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_us_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_us_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school us history.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_high_school_us_history"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_world_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_world_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school world history.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_high_school_world_history"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_human_aging"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_human_sexuality"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are multiple choice questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_international_law"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_jurisprudence"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are multiple choice questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_logical_fallacies"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are multiple choice questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_machine_learning"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_management"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_marketing"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are multiple choice questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_medical_genetics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_miscellaneous"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_moral_disputes"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_moral_scenarios"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_nutrition"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_philosophy"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_prehistory"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_accounting.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_accounting"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ accounting.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_professional_accounting"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_professional_law"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_medicine"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_professional_medicine"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_psychology"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ psychology.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_professional_psychology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are multiple choice questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_public_relations"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are multiple choice questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_security_studies"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_sociology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are multiple choice questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_us_foreign_policy"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_virology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/generative/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are multiple choice questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_world_religions"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/_mmlu.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_flan_n_shot_loglikelihood
 2 | group_alias: mmlu (flan style, loglikelihood)
 3 | task:
 4 |   - group: stem
 5 |     task:
 6 |       - mmlu_flan_n_shot_loglikelihood_stem
 7 |     aggregate_metric_list:
 8 |       - metric: acc
 9 |         weight_by_size: True
10 |   - group: other
11 |     task:
12 |       - mmlu_flan_n_shot_loglikelihood_other
13 |     aggregate_metric_list:
14 |       - metric: acc
15 |         weight_by_size: True
16 |   - group: social sciences
17 |     task:
18 |       - mmlu_flan_n_shot_loglikelihood_social_sciences
19 |     aggregate_metric_list:
20 |       - metric: acc
21 |         weight_by_size: True
22 |   - group: humanities
23 |     task:
24 |       - mmlu_flan_n_shot_loglikelihood_humanities
25 |     aggregate_metric_list:
26 |       - metric: acc
27 |         weight_by_size: True
28 | aggregate_metric_list:
29 |   - metric: acc
30 |     weight_by_size: True
31 | metadata:
32 |   version: 2
33 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/_mmlu_flan_loglikelihood_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split
 2 | test_split: test
 3 | fewshot_split: dev
 4 | fewshot_config:
 5 |   sampler: first_n
 6 | output_type: multiple_choice
 7 | doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA:"
 8 | doc_to_choice: ["(A)", "(B)", "(C)", "(D)"]
 9 | doc_to_target: answer
10 | metric_list:
11 |   - metric: acc
12 |     aggregation: mean
13 |     higher_is_better: true
14 | metadata:
15 |   version: 2.0
16 | dataset_kwargs:
17 |   trust_remote_code: true
18 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are multiple choice questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_abstract_algebra"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_anatomy"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_astronomy"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_business_ethics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are multiple choice questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_clinical_knowledge"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_college_biology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_college_chemistry"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_computer_science"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ computer science.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_college_computer_science"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_college_mathematics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_college_medicine"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_college_physics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are multiple choice questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_computer_security"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are multiple choice questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_conceptual_physics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_econometrics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_electrical_engineering.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "electrical_engineering"
2 | "description": "The following are multiple choice questions (with answers) about electrical\
3 |   \ engineering.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_electrical_engineering"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_elementary_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "elementary_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about elementary\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_elementary_mathematics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_formal_logic"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_global_facts"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_biology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school chemistry.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_chemistry"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_computer_science"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school computer science.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_computer_science"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_european_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_european_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school european history.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_european_history"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_geography.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_geography"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school geography.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_geography"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_government_and_politics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_government_and_politics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school government and politics.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_government_and_politics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_macroeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_macroeconomics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school macroeconomics.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_macroeconomics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school mathematics.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_mathematics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_microeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_microeconomics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school microeconomics.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_microeconomics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_physics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_psychology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school psychology.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_psychology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_statistics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_statistics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school statistics.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_statistics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_us_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_us_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school us history.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_us_history"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_world_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_world_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school world history.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_high_school_world_history"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_human_aging"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_human_sexuality"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are multiple choice questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_international_law"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_jurisprudence"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are multiple choice questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_logical_fallacies"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are multiple choice questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_machine_learning"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_management"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_marketing"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are multiple choice questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_medical_genetics"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_miscellaneous"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_moral_disputes"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_moral_scenarios"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_nutrition"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_philosophy"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_prehistory"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_accounting.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_accounting"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ accounting.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_professional_accounting"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_professional_law"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_medicine"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_professional_medicine"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_psychology"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ psychology.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_professional_psychology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are multiple choice questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_public_relations"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are multiple choice questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_security_studies"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_sociology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are multiple choice questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_us_foreign_policy"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_virology"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are multiple choice questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_world_religions"
7 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are multiple choice questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_abstract_algebra_generative"
7 | "task_alias": "abstract_algebra"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_anatomy_generative"
7 | "task_alias": "anatomy"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_astronomy_generative"
7 | "task_alias": "astronomy"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_business_ethics_generative"
7 | "task_alias": "business_ethics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are multiple choice questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_clinical_knowledge_generative"
7 | "task_alias": "clinical_knowledge"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_biology_generative"
7 | "task_alias": "college_biology"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_chemistry_generative"
7 | "task_alias": "college_chemistry"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_college_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_computer_science"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ computer science.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_computer_science_generative"
7 | "task_alias": "college_computer_science"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_mathematics_generative"
7 | "task_alias": "college_mathematics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_medicine_generative"
7 | "task_alias": "college_medicine"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_physics_generative"
7 | "task_alias": "college_physics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are multiple choice questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_computer_security_generative"
7 | "task_alias": "computer_security"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are multiple choice questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_conceptual_physics_generative"
7 | "task_alias": "conceptual_physics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_econometrics_generative"
7 | "task_alias": "econometrics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_electrical_engineering.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "electrical_engineering"
2 | "description": "The following are multiple choice questions (with answers) about electrical\
3 |   \ engineering.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_electrical_engineering_generative"
7 | "task_alias": "electrical_engineering"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_elementary_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "elementary_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about elementary\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_elementary_mathematics_generative"
7 | "task_alias": "elementary_mathematics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_formal_logic_generative"
7 | "task_alias": "formal_logic"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_global_facts_generative"
7 | "task_alias": "global_facts"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_biology_generative"
7 | "task_alias": "high_school_biology"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school chemistry.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_chemistry_generative"
7 | "task_alias": "high_school_chemistry"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_computer_science"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school computer science.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_computer_science_generative"
7 | "task_alias": "high_school_computer_science"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_european_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_european_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school european history.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_european_history_generative"
7 | "task_alias": "high_school_european_history"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_geography.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_geography"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school geography.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_geography_generative"
7 | "task_alias": "high_school_geography"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_government_and_politics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_government_and_politics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school government and politics.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_government_and_politics_generative"
7 | "task_alias": "high_school_government_and_politics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_macroeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_macroeconomics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school macroeconomics.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_macroeconomics_generative"
7 | "task_alias": "high_school_macroeconomics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school mathematics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_mathematics_generative"
7 | "task_alias": "high_school_mathematics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_microeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_microeconomics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school microeconomics.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_microeconomics_generative"
7 | "task_alias": "high_school_microeconomics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_physics_generative"
7 | "task_alias": "high_school_physics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_psychology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school psychology.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_psychology_generative"
7 | "task_alias": "high_school_psychology"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_statistics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_statistics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school statistics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_statistics_generative"
7 | "task_alias": "high_school_statistics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_us_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_us_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school us history.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_us_history_generative"
7 | "task_alias": "high_school_us_history"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_high_school_world_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_world_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school world history.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_world_history_generative"
7 | "task_alias": "high_school_world_history"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_human_aging_generative"
7 | "task_alias": "human_aging"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_human_sexuality_generative"
7 | "task_alias": "human_sexuality"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are multiple choice questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_international_law_generative"
7 | "task_alias": "international_law"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_jurisprudence_generative"
7 | "task_alias": "jurisprudence"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are multiple choice questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_logical_fallacies_generative"
7 | "task_alias": "logical_fallacies"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are multiple choice questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_machine_learning_generative"
7 | "task_alias": "machine_learning"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_management_generative"
7 | "task_alias": "management"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_marketing_generative"
7 | "task_alias": "marketing"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are multiple choice questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_medical_genetics_generative"
7 | "task_alias": "medical_genetics"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_miscellaneous_generative"
7 | "task_alias": "miscellaneous"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_moral_disputes_generative"
7 | "task_alias": "moral_disputes"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_moral_scenarios_generative"
7 | "task_alias": "moral_scenarios"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_nutrition_generative"
7 | "task_alias": "nutrition"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_philosophy_generative"
7 | "task_alias": "philosophy"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_prehistory_generative"
7 | "task_alias": "prehistory"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_professional_accounting.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_accounting"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ accounting.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_accounting_generative"
7 | "task_alias": "professional_accounting"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_law_generative"
7 | "task_alias": "professional_law"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_professional_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_medicine"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_medicine_generative"
7 | "task_alias": "professional_medicine"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_professional_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_psychology"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ psychology.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_psychology_generative"
7 | "task_alias": "professional_psychology"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are multiple choice questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_public_relations_generative"
7 | "task_alias": "public_relations"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are multiple choice questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_security_studies_generative"
7 | "task_alias": "security_studies"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_sociology_generative"
7 | "task_alias": "sociology"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are multiple choice questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_us_foreign_policy_generative"
7 | "task_alias": "us_foreign_policy"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_virology_generative"
7 | "task_alias": "virology"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu/generative/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are multiple choice questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_world_religions_generative"
7 | "task_alias": "world_religions"
8 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu_pro/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: TIGER-Lab/MMLU-Pro
 2 | test_split: test
 3 | fewshot_split: validation
 4 | fewshot_config:
 5 |   sampler: first_n
 6 |   doc_to_text: !function utils.fewshot_to_text
 7 |   doc_to_target: ""
 8 | output_type: generate_until
 9 | doc_to_text: !function utils.doc_to_text
10 | doc_to_target: answer
11 | filter_list:
12 |   - name: "custom-extract"
13 |     filter:
14 |       - function: "regex"
15 |         regex_pattern: 'answer is \(?([ABCDEFGHIJ])\)?'
16 |         # regex_pattern: r".*[aA]nswer:\s*([A-J])",
17 |       - function: "take_first"
18 | generation_kwargs:
19 |   until:
20 |     - "</s>"
21 |     - "Q:"
22 |     - "<|im_end|>"
23 |   do_sample: false
24 |   temperature: 0.0
25 | num_fewshot: 5
26 | metric_list:
27 |   - metric: exact_match
28 |     aggregation: mean
29 |     higher_is_better: true
30 |     ignore_case: true
31 |     ignore_punctuation: true
32 | metadata:
33 |   version: 1.0
34 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu_pro/_mmlu_pro.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_pro
 2 | task:
 3 |   - mmlu_pro_biology
 4 |   - mmlu_pro_business
 5 |   - mmlu_pro_chemistry
 6 |   - mmlu_pro_computer_science
 7 |   - mmlu_pro_economics
 8 |   - mmlu_pro_engineering
 9 |   - mmlu_pro_health
10 |   - mmlu_pro_history
11 |   - mmlu_pro_law
12 |   - mmlu_pro_math
13 |   - mmlu_pro_other
14 |   - mmlu_pro_philosophy
15 |   - mmlu_pro_physics
16 |   - mmlu_pro_psychology
17 | aggregate_metric_list:
18 |   - aggregation: mean
19 |     metric: exact_match
20 |     weight_by_size: true
21 |     filter_list: custom-extract
22 | metadata:
23 |   version: 2.0
24 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_biology.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about biology. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n"
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_biology"
4 | task_alias: "biology"
5 | process_docs: !function utils.process_biology
6 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_business.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about business. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n"
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_business"
4 | task_alias: "business"
5 | process_docs: !function utils.process_business
6 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_chemistry.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about chemistry. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n"
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_chemistry"
4 | task_alias: "chemistry"
5 | process_docs: !function utils.process_chemistry
6 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_computer_science.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about computer science. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n"
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_computer_science"
4 | task_alias: "computer_science"
5 | process_docs: !function utils.process_computer_science
6 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_economics.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about economics. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n"
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_economics"
4 | task_alias: "economics"
5 | process_docs: !function utils.process_economics
6 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_engineering.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about engineering. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n"
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_engineering"
4 | task_alias: "engineering"
5 | process_docs: !function utils.process_engineering
6 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_health.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about health. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n"
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_health"
4 | task_alias: "health"
5 | process_docs: !function utils.process_health
6 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_history.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about history. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n"
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_history"
4 | task_alias: "history"
5 | process_docs: !function utils.process_history
6 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_law.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about law. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n"
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_law"
4 | task_alias: "law"
5 | process_docs: !function utils.process_law
6 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_math.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about math. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n"
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_math"
4 | task_alias: "math"
5 | process_docs: !function utils.process_math
6 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_other.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about other. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n"
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_other"
4 | task_alias: "other"
5 | process_docs: !function utils.process_other
6 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_philosophy.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about philosophy. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n"
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_philosophy"
4 | task_alias: "philosophy"
5 | process_docs: !function utils.process_philosophy
6 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_physics.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about physics. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n"
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_physics"
4 | task_alias: "physics"
5 | process_docs: !function utils.process_physics
6 | 


--------------------------------------------------------------------------------
/eval_instruct/lm_eval/tasks/mmlu_pro/mmlu_pro_psychology.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about psychology. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.\n"
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_psychology"
4 | task_alias: "psychology"
5 | process_docs: !function utils.process_psychology
6 | 


--------------------------------------------------------------------------------
/eval_instruct/requirements.txt:
--------------------------------------------------------------------------------
1 | -e .
2 | 


--------------------------------------------------------------------------------
/eval_instruct/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 | 
3 | 
4 | # This is to make sure that the package supports editable installs
5 | setuptools.setup()
6 | 


--------------------------------------------------------------------------------
/imgs/example_gradio.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HKUNLP/Dream/e244fb6804dc8884bba01a3dfec69ff5dcb64d4e/imgs/example_gradio.gif


--------------------------------------------------------------------------------