├── tmp
    └── .gitkeep
├── lmms_eval
    ├── __init__.py
    ├── api
    │   ├── __init__.py
    │   └── __pycache__
    │   │   ├── task.cpython-310.pyc
    │   │   ├── filter.cpython-310.pyc
    │   │   ├── group.cpython-310.pyc
    │   │   ├── metrics.cpython-310.pyc
    │   │   ├── model.cpython-310.pyc
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── instance.cpython-310.pyc
    │   │   ├── registry.cpython-310.pyc
    │   │   └── samplers.cpython-310.pyc
    ├── caching
    │   ├── __init__.py
    │   └── __pycache__
    │   │   ├── cache.cpython-310.pyc
    │   │   └── __init__.cpython-310.pyc
    ├── models
    │   ├── model_utils
    │   │   └── __init__.py
    │   ├── video_chatgpt
    │   │   ├── eval
    │   │   │   └── __init__.py
    │   │   ├── __init__.py
    │   │   ├── model
    │   │   │   └── __init__.py
    │   │   ├── constants.py
    │   │   └── utils.py
    │   └── __pycache__
    │   │   ├── llava.cpython-310.pyc
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── flash_hd.cpython-310.pyc
    │   │   ├── imp_llava.cpython-310.pyc
    │   │   ├── llava_hr.cpython-310.pyc
    │   │   ├── FlashSloth.cpython-310.pyc
    │   │   └── FlashSloth_HD.cpython-310.pyc
    ├── tasks
    │   ├── _task_utils
    │   │   ├── gpt_eval_utils.py
    │   │   ├── __pycache__
    │   │   │   ├── file_utils.cpython-310.pyc
    │   │   │   └── vqa_eval_metric.cpython-310.pyc
    │   │   └── file_utils.py
    │   ├── multilingual-llava-bench-in-the-wild
    │   │   ├── README.md
    │   │   ├── urdu_llava_in_the_wild.yaml
    │   │   ├── arabic_llava_in_the_wild.yaml
    │   │   ├── french_llava_in_the_wild.yaml
    │   │   ├── hindi_llava_in_the_wild.yaml
    │   │   ├── spanish_llava_in_the_wild.yaml
    │   │   ├── bengali_llava_in_the_wild.yaml
    │   │   ├── chinese_llava_in_the_wild.yaml
    │   │   ├── russian_llava_in_the_wild.yaml
    │   │   └── japanese_llava_in_the_wild.yaml
    │   ├── mmt
    │   │   ├── mmt.yaml
    │   │   ├── mmt_mi.yaml
    │   │   └── _default_template_yaml
    │   ├── ok_vqa
    │   │   ├── _ok_vqa.yaml
    │   │   ├── ok_vqa_val2014.yaml
    │   │   └── __pycache__
    │   │   │   └── utils.cpython-310.pyc
    │   ├── flickr30k
    │   │   └── flickr30k.yaml
    │   ├── mmmu
    │   │   ├── mmmu.yaml
    │   │   ├── arial.ttf
    │   │   ├── mmmu_group_img.yaml
    │   │   ├── _default_template_yaml
    │   │   └── __pycache__
    │   │   │   └── utils.cpython-310.pyc
    │   ├── vqav2
    │   │   ├── _vqav2.yaml
    │   │   ├── __pycache__
    │   │   │   └── utils.cpython-310.pyc
    │   │   ├── vqav2_test.yaml
    │   │   └── vqav2_val.yaml
    │   ├── cmmmu
    │   │   ├── _cmmmu.yaml
    │   │   └── _default_template_cmmmu_yaml
    │   ├── docvqa
    │   │   ├── docvqa.yaml
    │   │   ├── __pycache__
    │   │   │   └── utils.cpython-310.pyc
    │   │   ├── docvqa_val.yaml
    │   │   └── docvqa_test.yaml
    │   ├── vatex
    │   │   └── _vatex.yaml
    │   ├── iconqa
    │   │   ├── iconqa.yaml
    │   │   ├── iconqa_test.yaml
    │   │   └── iconqa_val.yaml
    │   ├── mmsearch
    │   │   └── retrieve_content
    │   │   │   └── tokenization
    │   │   │       └── __init__.py
    │   ├── nocaps
    │   │   ├── nocaps.yaml
    │   │   └── _default_template_nocaps_yaml
    │   ├── textvqa
    │   │   ├── _textvqa.yaml
    │   │   ├── __pycache__
    │   │   │   └── utils.cpython-310.pyc
    │   │   └── textvqa_test.yaml
    │   ├── websrc
    │   │   └── websrc.yaml
    │   ├── infovqa
    │   │   ├── infovqa.yaml
    │   │   ├── __pycache__
    │   │   │   └── utils.cpython-310.pyc
    │   │   ├── infovqa_val.yaml
    │   │   └── infovqa_test.yaml
    │   ├── synthdog
    │   │   └── synthdog.yaml
    │   ├── textcaps
    │   │   ├── textcaps.yaml
    │   │   └── _default_template_textcaps_yaml
    │   ├── worldqa
    │   │   ├── worldqa.yaml
    │   │   └── _default_template_yaml
    │   ├── vizwiz_vqa
    │   │   ├── _vizwiz_vqa.yaml
    │   │   └── __pycache__
    │   │   │   └── utils.cpython-310.pyc
    │   ├── pope
    │   │   ├── pope_full.yaml
    │   │   └── __pycache__
    │   │   │   └── utils.cpython-310.pyc
    │   ├── scienceqa
    │   │   ├── scienceqa_full.yaml
    │   │   └── __pycache__
    │   │   │   └── utils.cpython-310.pyc
    │   ├── arc
    │   │   └── arc_challenge.yaml
    │   ├── coco_cap
    │   │   ├── coco2014_cap.yaml
    │   │   ├── coco2017_cap.yaml
    │   │   └── coco_cap.yaml
    │   ├── multidocvqa
    │   │   └── multidocvqa.yaml
    │   ├── screenspot
    │   │   ├── _screenspot.yaml
    │   │   ├── screenspot_rec_test.yaml
    │   │   └── screenspot_reg_test.yaml
    │   ├── nextqa
    │   │   ├── nextqa.yaml
    │   │   └── _default_template_yaml
    │   ├── qbench
    │   │   └── qbenchs_dev.yaml
    │   ├── wild_vision_bench
    │   │   ├── wildvision_bench.yaml
    │   │   ├── wild_vision_bench0617.yaml
    │   │   └── wild_vision_bench0630.yaml
    │   ├── internal_eval
    │   │   ├── internal_eval.yaml
    │   │   └── _default_template_internal_eval_yaml
    │   ├── live_bench
    │   │   ├── live_bench_2406.yaml
    │   │   ├── live_bench_2407.yaml
    │   │   ├── live_bench_2409.yaml
    │   │   └── live_bench.yaml
    │   ├── jmmmu
    │   │   ├── jmmmu_math.yaml
    │   │   ├── jmmmu_music.yaml
    │   │   ├── jmmmu_biology.yaml
    │   │   ├── jmmmu_design.yaml
    │   │   ├── jmmmu_manage.yaml
    │   │   ├── jmmmu_physics.yaml
    │   │   ├── jmmmu_chemistry.yaml
    │   │   ├── jmmmu_economics.yaml
    │   │   ├── jmmmu_finance.yaml
    │   │   ├── jmmmu_marketing.yaml
    │   │   ├── jmmmu_materials.yaml
    │   │   ├── jmmmu_pharmacy.yaml
    │   │   ├── jmmmu_accounting.yaml
    │   │   ├── jmmmu_psychology.yaml
    │   │   ├── jmmmu_agriculture.yaml
    │   │   ├── jmmmu_electronics.yaml
    │   │   ├── jmmmu_japanese_art.yaml
    │   │   ├── jmmmu_public_health.yaml
    │   │   ├── jmmmu_world_history.yaml
    │   │   ├── jmmmu_clinical_medicine.yaml
    │   │   ├── jmmmu_computer_science.yaml
    │   │   ├── jmmmu_energy_and_power.yaml
    │   │   ├── jmmmu_japanese_heritage.yaml
    │   │   ├── jmmmu_japanese_history.yaml
    │   │   ├── jmmmu_basic_medical_science.yaml
    │   │   ├── jmmmu_mechanical_engineering.yaml
    │   │   ├── jmmmu_architecture_and_engineering.yaml
    │   │   └── jmmmu_diagnostics_and_laboratory_medicine.yaml
    │   ├── detailcaps
    │   │   └── _default_template_detailcaps_yaml
    │   ├── refcoco
    │   │   ├── refcoco_seg_val.yaml
    │   │   ├── refcoco_bbox_val.yaml
    │   │   ├── refcoco_seg_test.yaml
    │   │   ├── refcoco_bbox_test.yaml
    │   │   ├── refcoco_bbox_testA.yaml
    │   │   ├── refcoco_bbox_testB.yaml
    │   │   ├── refcoco_seg_testA.yaml
    │   │   ├── refcoco_seg_testB.yaml
    │   │   ├── refcoco_bbox_rec_val.yaml
    │   │   ├── refcoco_bbox_rec_test.yaml
    │   │   ├── refcoco_bbox_rec_testA.yaml
    │   │   ├── refcoco_bbox_rec_testB.yaml
    │   │   └── _refcoco.yaml
    │   ├── refcoco+
    │   │   ├── refcoco+_seg_val.yaml
    │   │   ├── refcoco+_bbox_val.yaml
    │   │   ├── refcoco+_seg_testA.yaml
    │   │   ├── refcoco+_seg_testB.yaml
    │   │   ├── refcoco+_bbox_testA.yaml
    │   │   ├── refcoco+_bbox_testB.yaml
    │   │   ├── refcoco+_bbox_rec_val.yaml
    │   │   ├── refcoco+_bbox_rec_testA.yaml
    │   │   ├── refcoco+_bbox_rec_testB.yaml
    │   │   └── _refcoco.yaml
    │   ├── refcocog
    │   │   ├── refcocog_seg_val.yaml
    │   │   ├── refcocog_bbox_val.yaml
    │   │   ├── refcocog_seg_test.yaml
    │   │   ├── _refcoco.yaml
    │   │   ├── refcocog_bbox_test.yaml
    │   │   ├── refcocog_bbox_rec_val.yaml
    │   │   └── refcocog_bbox_rec_test.yaml
    │   ├── __pycache__
    │   │   └── __init__.cpython-310.pyc
    │   ├── ai2d
    │   │   └── __pycache__
    │   │   │   └── utils.cpython-310.pyc
    │   ├── gqa
    │   │   └── __pycache__
    │   │   │   └── utils.cpython-310.pyc
    │   ├── mme
    │   │   └── __pycache__
    │   │   │   └── utils.cpython-310.pyc
    │   ├── mmmu_pro
    │   │   ├── _default_template_yaml
    │   │   ├── mmmu_pro_cot.yaml
    │   │   └── mmmu_pro.yaml
    │   ├── mmvet
    │   │   └── __pycache__
    │   │   │   └── utils.cpython-310.pyc
    │   ├── chartqa
    │   │   └── __pycache__
    │   │   │   └── utils.cpython-310.pyc
    │   ├── mathvista
    │   │   ├── __pycache__
    │   │   │   ├── utils.cpython-310.pyc
    │   │   │   └── mathvista_evals.cpython-310.pyc
    │   │   ├── mathvista.yaml
    │   │   └── mathvista_testmini.yaml
    │   ├── ocrbench
    │   │   └── __pycache__
    │   │   │   └── utils.cpython-310.pyc
    │   ├── seedbench
    │   │   └── __pycache__
    │   │   │   └── utils.cpython-310.pyc
    │   ├── videochatgpt
    │   │   ├── _videochatgpt.yaml
    │   │   └── _default_template_yaml
    │   ├── youcook2
    │   │   └── _default_template_yaml
    │   ├── gpqa
    │   │   ├── n_shot
    │   │   │   ├── gpqa_main_n_shot.yaml
    │   │   │   ├── gpqa_diamond_n_shot.yaml
    │   │   │   └── gpqa_extended_n_shot.yaml
    │   │   ├── zeroshot
    │   │   │   ├── gpqa_main_zeroshot.yaml
    │   │   │   ├── gpqa_diamond_zeroshot.yaml
    │   │   │   └── gpqa_extended_zeroshot.yaml
    │   │   ├── cot_n_shot
    │   │   │   ├── gpqa_main_cot_n_shot.yaml
    │   │   │   ├── gpqa_diamond_cot_n_shot.yaml
    │   │   │   └── gpqa_extended_cot_n_shot.yaml
    │   │   ├── cot_zeroshot
    │   │   │   ├── gpqa_main_cot_zeroshot.yaml
    │   │   │   ├── gpqa_diamond_cot_zeroshot.yaml
    │   │   │   └── gpqa_extended_cot_zeroshot.yaml
    │   │   └── generative
    │   │   │   ├── gpqa_main_generative_n_shot.yaml
    │   │   │   ├── gpqa_diamond_generative_n_shot.yaml
    │   │   │   └── gpqa_extended_generative_n_shot.yaml
    │   ├── mmbench
    │   │   ├── __pycache__
    │   │   │   ├── cc_utils.cpython-310.pyc
    │   │   │   ├── cn_utils.cpython-310.pyc
    │   │   │   ├── en_utils.cpython-310.pyc
    │   │   │   └── mmbench_evals.cpython-310.pyc
    │   │   ├── mmbench_cn.yaml
    │   │   ├── mmbench_en.yaml
    │   │   ├── mmbench_cn_test.yaml
    │   │   ├── mmbench_en_test.yaml
    │   │   └── mmbench.yaml
    │   ├── olympiadbench
    │   │   └── olympiadbench.yaml
    │   ├── realworldqa
    │   │   └── __pycache__
    │   │   │   └── utils.cpython-310.pyc
    │   ├── hallusion_bench
    │   │   └── __pycache__
    │   │   │   ├── utils.cpython-310.pyc
    │   │   │   └── evaluate_hb.cpython-310.pyc
    │   ├── mix_evals
    │   │   └── mix_evals_video2text.yaml
    │   ├── tempcompass
    │   │   └── _tempcompass.yaml
    │   ├── llava_interleave_bench
    │   │   ├── interleave_bench.yaml
    │   │   └── _default_template_interleave_yaml
    │   ├── vitatecs
    │   │   ├── _vitatecs.yaml
    │   │   └── _default_template_yaml
    │   ├── mmlu
    │   │   ├── default
    │   │   │   ├── _mmlu_stem.yaml
    │   │   │   ├── _mmlu_other.yaml
    │   │   │   ├── _mmlu_humanities.yaml
    │   │   │   ├── _mmlu.yaml
    │   │   │   ├── _mmlu_social_sciences.yaml
    │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   ├── mmlu_virology.yaml
    │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   ├── mmlu_marketing.yaml
    │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   ├── mmlu_management.yaml
    │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   ├── mmlu_sociology.yaml
    │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   ├── mmlu_world_religions.yaml
    │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   ├── mmlu_international_law.yaml
    │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   └── mmlu_high_school_psychology.yaml
    │   │   ├── continuation
    │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   ├── mmlu_virology.yaml
    │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   ├── mmlu_marketing.yaml
    │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   ├── mmlu_management.yaml
    │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   ├── mmlu_sociology.yaml
    │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   ├── mmlu_world_religions.yaml
    │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   ├── mmlu_international_law.yaml
    │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   └── mmlu_high_school_european_history.yaml
    │   │   ├── generative
    │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   ├── mmlu_virology.yaml
    │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   ├── mmlu_marketing.yaml
    │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   ├── mmlu_management.yaml
    │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   ├── mmlu_sociology.yaml
    │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   ├── mmlu_world_religions.yaml
    │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   ├── mmlu_international_law.yaml
    │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   └── mmlu_us_foreign_policy.yaml
    │   │   ├── flan_cot_zeroshot
    │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   ├── mmlu_marketing.yaml
    │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   ├── mmlu_virology.yaml
    │   │   │   ├── mmlu_management.yaml
    │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   ├── mmlu_sociology.yaml
    │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   ├── mmlu_world_religions.yaml
    │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   ├── mmlu_international_law.yaml
    │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   └── mmlu_high_school_statistics.yaml
    │   │   └── flan_n_shot
    │   │   │   ├── generative
    │   │   │       ├── mmlu_anatomy.yaml
    │   │   │       ├── mmlu_virology.yaml
    │   │   │       ├── mmlu_astronomy.yaml
    │   │   │       ├── mmlu_marketing.yaml
    │   │   │       ├── mmlu_nutrition.yaml
    │   │   │       ├── mmlu_management.yaml
    │   │   │       ├── mmlu_human_aging.yaml
    │   │   │       ├── mmlu_philosophy.yaml
    │   │   │       ├── mmlu_prehistory.yaml
    │   │   │       ├── mmlu_sociology.yaml
    │   │   │       ├── mmlu_global_facts.yaml
    │   │   │       ├── mmlu_miscellaneous.yaml
    │   │   │       ├── mmlu_econometrics.yaml
    │   │   │       ├── mmlu_formal_logic.yaml
    │   │   │       ├── mmlu_jurisprudence.yaml
    │   │   │       ├── mmlu_business_ethics.yaml
    │   │   │       ├── mmlu_college_biology.yaml
    │   │   │       ├── mmlu_college_physics.yaml
    │   │   │       ├── mmlu_moral_disputes.yaml
    │   │   │       ├── mmlu_abstract_algebra.yaml
    │   │   │       ├── mmlu_college_chemistry.yaml
    │   │   │       ├── mmlu_college_medicine.yaml
    │   │   │       ├── mmlu_computer_security.yaml
    │   │   │       ├── mmlu_machine_learning.yaml
    │   │   │       ├── mmlu_medical_genetics.yaml
    │   │   │       ├── mmlu_moral_scenarios.yaml
    │   │   │       ├── mmlu_world_religions.yaml
    │   │   │       ├── mmlu_clinical_knowledge.yaml
    │   │   │       ├── mmlu_conceptual_physics.yaml
    │   │   │       ├── mmlu_human_sexuality.yaml
    │   │   │       ├── mmlu_professional_law.yaml
    │   │   │       ├── mmlu_college_mathematics.yaml
    │   │   │       ├── mmlu_high_school_biology.yaml
    │   │   │       ├── mmlu_high_school_physics.yaml
    │   │   │       └── mmlu_international_law.yaml
    │   │   │   └── loglikelihood
    │   │   │       ├── mmlu_anatomy.yaml
    │   │   │       ├── mmlu_astronomy.yaml
    │   │   │       ├── mmlu_marketing.yaml
    │   │   │       ├── mmlu_nutrition.yaml
    │   │   │       ├── mmlu_virology.yaml
    │   │   │       ├── mmlu_management.yaml
    │   │   │       ├── mmlu_human_aging.yaml
    │   │   │       ├── mmlu_philosophy.yaml
    │   │   │       ├── mmlu_prehistory.yaml
    │   │   │       ├── mmlu_sociology.yaml
    │   │   │       ├── mmlu_global_facts.yaml
    │   │   │       ├── mmlu_formal_logic.yaml
    │   │   │       ├── mmlu_miscellaneous.yaml
    │   │   │       ├── mmlu_business_ethics.yaml
    │   │   │       ├── mmlu_college_biology.yaml
    │   │   │       ├── mmlu_college_physics.yaml
    │   │   │       ├── mmlu_econometrics.yaml
    │   │   │       └── mmlu_jurisprudence.yaml
    │   ├── mmupd
    │   │   ├── mmupd_option.yaml
    │   │   ├── mmupd_base.yaml
    │   │   ├── mmupd_instruction.yaml
    │   │   ├── mmupd.yaml
    │   │   ├── mmaad_base.yaml
    │   │   ├── mmiasd_base.yaml
    │   │   └── mmivqd_base.yaml
    │   ├── egoschema
    │   │   └── _default_template_yaml
    │   ├── perceptiontest
    │   │   ├── test
    │   │   │   └── _default_template_yaml
    │   │   └── val
    │   │   │   └── _default_template_yaml
    │   ├── mvbench
    │   │   ├── mvbench_action_count.yaml
    │   │   ├── mvbench_moving_count.yaml
    │   │   ├── mvbench_state_change.yaml
    │   │   ├── mvbench_action_antonym.yaml
    │   │   ├── mvbench_object_shuffle.yaml
    │   │   ├── mvbench_action_sequence.yaml
    │   │   ├── mvbench_character_order.yaml
    │   │   ├── mvbench_action_prediction.yaml
    │   │   ├── mvbench_fine_grained_pose.yaml
    │   │   ├── mvbench_moving_attribute.yaml
    │   │   ├── mvbench_moving_direction.yaml
    │   │   ├── mvbench_object_existence.yaml
    │   │   ├── mvbench_scene_transition.yaml
    │   │   ├── mvbench_unexpected_action.yaml
    │   │   ├── mvbench_object_interaction.yaml
    │   │   ├── mvbench_action_localization.yaml
    │   │   ├── mvbench_fine_grained_action.yaml
    │   │   ├── mvbench_egocentric_navigation.yaml
    │   │   └── mvbench_counterfactual_inference.yaml
    │   ├── cvrr
    │   │   └── _default_template_yaml
    │   ├── mathverse
    │   │   ├── mathverse_testmini_text.yaml
    │   │   └── mathverse_testmini_vision.yaml
    │   ├── mmlu_pro
    │   │   ├── mmlu_pro_law.yaml
    │   │   ├── mmlu_pro_math.yaml
    │   │   ├── mmlu_pro_other.yaml
    │   │   └── mmlu_pro_health.yaml
    │   └── video_detail_description
    │   │   └── _default_template_yaml
    ├── loggers
    │   ├── __init__.py
    │   └── __pycache__
    │   │   ├── utils.cpython-310.pyc
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── wandb_logger.cpython-310.pyc
    │   │   └── evaluation_tracker.cpython-310.pyc
    ├── __pycache__
    │   ├── utils.cpython-310.pyc
    │   ├── __init__.cpython-310.pyc
    │   ├── __main__.cpython-310.pyc
    │   ├── evaluator.cpython-310.pyc
    │   └── evaluator_utils.cpython-310.pyc
    └── filters
    │   └── __pycache__
    │       ├── __init__.cpython-310.pyc
    │       ├── extraction.cpython-310.pyc
    │       ├── selection.cpython-310.pyc
    │       └── transformation.cpython-310.pyc
├── images
    ├── e1.png
    ├── e2.png
    ├── e3.png
    ├── e4.jpg
    ├── main.png
    ├── example1.png
    ├── example3.png
    ├── 1733320415394.jpg
    ├── 1733397891872.jpg
    ├── performance.png
    └── flashsloth_yellow.png
├── all.sh
├── flashsloth
    ├── model
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── fusion.cpython-310.pyc
    │   │   ├── ldpnet.cpython-310.pyc
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── builder.cpython-310.pyc
    │   │   ├── lqformer.cpython-310.pyc
    │   │   ├── pooling.cpython-310.pyc
    │   │   ├── llava_arch.cpython-310.pyc
    │   │   ├── lqformer_new.cpython-310.pyc
    │   │   └── learnable_token.cpython-310.pyc
    │   ├── language_model
    │   │   ├── __pycache__
    │   │   │   └── flashsloth.cpython-310.pyc
    │   │   └── phi2
    │   │   │   └── __pycache__
    │   │   │       ├── modeling_phi.cpython-310.pyc
    │   │   │       └── configuration_phi.cpython-310.pyc
    │   ├── multimodal_encoder
    │   │   ├── __pycache__
    │   │   │   ├── builder.cpython-310.pyc
    │   │   │   └── clip_encoder.cpython-310.pyc
    │   │   └── siglip
    │   │   │   └── __pycache__
    │   │   │       ├── modeling_siglip.cpython-310.pyc
    │   │   │       ├── configuration_siglip.cpython-310.pyc
    │   │   │       ├── image_processing_imp.cpython-310.pyc
    │   │   │       └── image_processing_flashsloth.cpython-310.pyc
    │   └── multimodal_projector
    │   │   └── __pycache__
    │   │       └── builder.cpython-310.pyc
    ├── eval
    │   ├── webpage
    │   │   └── figures
    │   │   │   ├── bard.jpg
    │   │   │   ├── llama.jpg
    │   │   │   ├── alpaca.png
    │   │   │   └── vicuna.jpeg
    │   └── __pycache__
    │   │   ├── eval_textvqa.cpython-310.pyc
    │   │   ├── m4c_evaluator.cpython-310.pyc
    │   │   ├── model_vqa_loader.cpython-310.pyc
    │   │   ├── model_vqa_mmbench.cpython-310.pyc
    │   │   └── model_vqa_science.cpython-310.pyc
    ├── __pycache__
    │   ├── utils.cpython-310.pyc
    │   ├── __init__.cpython-310.pyc
    │   ├── constants.cpython-310.pyc
    │   ├── mm_utils.cpython-310.pyc
    │   └── conversation.cpython-310.pyc
    └── train
    │   └── __pycache__
    │       ├── save.cpython-310.pyc
    │       ├── align.cpython-310.pyc
    │       ├── train.cpython-310.pyc
    │       ├── finetune.cpython-310.pyc
    │       ├── pretrain.cpython-310.pyc
    │       ├── finetune_hd.cpython-310.pyc
    │       └── llava_trainer.cpython-310.pyc
└── requirements.txt


/tmp/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms_eval/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms_eval/api/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms_eval/caching/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms_eval/models/model_utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms_eval/models/video_chatgpt/eval/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/_task_utils/gpt_eval_utils.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmt/mmt.yaml:
--------------------------------------------------------------------------------
1 | group: mmt
2 | task:
3 | - mmt_val
4 | - mmt_test


--------------------------------------------------------------------------------
/images/e1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/images/e1.png


--------------------------------------------------------------------------------
/images/e2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/images/e2.png


--------------------------------------------------------------------------------
/images/e3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/images/e3.png


--------------------------------------------------------------------------------
/images/e4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/images/e4.jpg


--------------------------------------------------------------------------------
/lmms_eval/tasks/ok_vqa/_ok_vqa.yaml:
--------------------------------------------------------------------------------
1 | group: ok_vqa
2 | task:
3 | - ok_vqa_val2014


--------------------------------------------------------------------------------
/all.sh:
--------------------------------------------------------------------------------
1 | bash scripts/save.sh 
2 | bash scripts/pretrain.sh 
3 | bash scripts/finetune.sh


--------------------------------------------------------------------------------
/images/main.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/images/main.png


--------------------------------------------------------------------------------
/lmms_eval/tasks/flickr30k/flickr30k.yaml:
--------------------------------------------------------------------------------
1 | group: flickr30k
2 | task:
3 | - flickr30k_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmmu/mmmu.yaml:
--------------------------------------------------------------------------------
1 | group: mmmu
2 | task:
3 | - mmmu_val
4 | - mmmu_test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/vqav2/_vqav2.yaml:
--------------------------------------------------------------------------------
1 | group: vqav2
2 | task:
3 | - vqav2_val
4 | - vqav2_test


--------------------------------------------------------------------------------
/images/example1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/images/example1.png


--------------------------------------------------------------------------------
/images/example3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/images/example3.png


--------------------------------------------------------------------------------
/lmms_eval/tasks/cmmmu/_cmmmu.yaml:
--------------------------------------------------------------------------------
1 | group: cmmmu
2 | task:
3 | - cmmmu_val
4 | - cmmmu_test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/docvqa/docvqa.yaml:
--------------------------------------------------------------------------------
1 | group: docvqa
2 | task:
3 | - docvqa_val
4 | - docvqa_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmt/mmt_mi.yaml:
--------------------------------------------------------------------------------
1 | group: mmt_mi
2 | task:
3 | - mmt_mi_val
4 | - mmt_mi_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/vatex/_vatex.yaml:
--------------------------------------------------------------------------------
1 | group : vatex
2 | task:
3 | - vatex_val_zh
4 | - vatex_test


--------------------------------------------------------------------------------
/lmms_eval/models/video_chatgpt/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import VideoChatGPTLlamaForCausalLM
2 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/iconqa/iconqa.yaml:
--------------------------------------------------------------------------------
1 | group: iconqa
2 | task:
3 | - iconqa_val
4 | - iconqa_test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmsearch/retrieve_content/tokenization/__init__.py:
--------------------------------------------------------------------------------
1 | # Implement your code here.
2 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/nocaps/nocaps.yaml:
--------------------------------------------------------------------------------
1 | group : nocaps
2 | task:
3 |   - nocaps_test
4 |   - nocaps_val


--------------------------------------------------------------------------------
/lmms_eval/tasks/textvqa/_textvqa.yaml:
--------------------------------------------------------------------------------
1 | group: textvqa
2 | task:
3 | - textvqa_val
4 | - textvqa_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/websrc/websrc.yaml:
--------------------------------------------------------------------------------
1 | group: websrc
2 | task:
3 | - websrc_val
4 | - websrc_test
5 | 


--------------------------------------------------------------------------------
/images/1733320415394.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/images/1733320415394.jpg


--------------------------------------------------------------------------------
/images/1733397891872.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/images/1733397891872.jpg


--------------------------------------------------------------------------------
/images/performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/images/performance.png


--------------------------------------------------------------------------------
/lmms_eval/tasks/infovqa/infovqa.yaml:
--------------------------------------------------------------------------------
1 | group: infovqa
2 | task:
3 | - infovqa_val
4 | - infovqa_test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/synthdog/synthdog.yaml:
--------------------------------------------------------------------------------
1 | group: synthdog
2 | task:
3 | - synthdog_en
4 | - synthdog_zh


--------------------------------------------------------------------------------
/lmms_eval/tasks/textcaps/textcaps.yaml:
--------------------------------------------------------------------------------
1 | group : textcaps
2 | task:
3 |   - textcaps_val
4 |   - textcaps_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/worldqa/worldqa.yaml:
--------------------------------------------------------------------------------
1 | group: worldqa
2 | task:
3 | - worldqa_gen
4 | - worldqa_mc
5 | 
6 | 


--------------------------------------------------------------------------------
/images/flashsloth_yellow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/images/flashsloth_yellow.png


--------------------------------------------------------------------------------
/lmms_eval/tasks/vizwiz_vqa/_vizwiz_vqa.yaml:
--------------------------------------------------------------------------------
1 | group: vizwiz_vqa
2 | task:
3 | - vizwiz_vqa_val
4 | - vizwiz_vqa_test


--------------------------------------------------------------------------------
/flashsloth/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .language_model.flashsloth import FlashSlothConfig, FlashSlothForCausalLM
2 | 
3 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmmu/arial.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/mmmu/arial.ttf


--------------------------------------------------------------------------------
/lmms_eval/tasks/pope/pope_full.yaml:
--------------------------------------------------------------------------------
1 | group : pope_full
2 | task:
3 |   - pope_adv
4 |   - pope_pop
5 |   - pope_random


--------------------------------------------------------------------------------
/lmms_eval/tasks/scienceqa/scienceqa_full.yaml:
--------------------------------------------------------------------------------
1 | group: scienceqa_full
2 | task:
3 |   - scienceqa
4 |   - scienceqa_img


--------------------------------------------------------------------------------
/lmms_eval/tasks/arc/arc_challenge.yaml:
--------------------------------------------------------------------------------
1 | include: arc_easy.yaml
2 | task: arc_challenge
3 | dataset_name: ARC-Challenge
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/coco_cap/coco2014_cap.yaml:
--------------------------------------------------------------------------------
1 | group : coco2014_cap
2 | task:
3 |   - coco2014_cap_val
4 |   - coco2014_cap_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/coco_cap/coco2017_cap.yaml:
--------------------------------------------------------------------------------
1 | group : coco2017_cap
2 | task:
3 |   - coco2017_cap_val
4 |   - coco2017_cap_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/iconqa/iconqa_test.yaml:
--------------------------------------------------------------------------------
1 | task: "iconqa_test"
2 | test_split: test
3 | include: _default_template_docvqa_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/iconqa/iconqa_val.yaml:
--------------------------------------------------------------------------------
1 | task: "iconqa_val"
2 | test_split: val
3 | include: _default_template_docvqa_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/multidocvqa/multidocvqa.yaml:
--------------------------------------------------------------------------------
1 | group: multidocvqa
2 | task:
3 | - multidocvqa_val
4 | - multidocvqa_test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/screenspot/_screenspot.yaml:
--------------------------------------------------------------------------------
1 | group: screenspot
2 | task:
3 | - screenspot_reg_test
4 | - screenspot_rec_test


--------------------------------------------------------------------------------
/lmms_eval/loggers/__init__.py:
--------------------------------------------------------------------------------
1 | from .evaluation_tracker import EvaluationTracker
2 | from .wandb_logger import WandbLogger
3 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmmu/mmmu_group_img.yaml:
--------------------------------------------------------------------------------
1 | group: mmmu_group_img
2 | task:
3 | - mmmu_val_group_img
4 | - mmmu_test_group_img
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/nextqa/nextqa.yaml:
--------------------------------------------------------------------------------
1 | group: nextqa
2 | task:
3 | - nextqa_oe_test
4 | - nextqa_oe_val
5 | - nextqa_mc_test
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/ok_vqa/ok_vqa_val2014.yaml:
--------------------------------------------------------------------------------
1 | task: ok_vqa_val2014
2 | test_split: val2014
3 | include: _default_template_vqa_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/qbench/qbenchs_dev.yaml:
--------------------------------------------------------------------------------
1 | group: qbenchs_dev
2 | task:
3 | - qbench_dev
4 | - qbench2_dev
5 | - abench_dev
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/wild_vision_bench/wildvision_bench.yaml:
--------------------------------------------------------------------------------
1 | group: wildvision
2 | task: 
3 |   - wildvision_0617
4 |   - wildvision_0630


--------------------------------------------------------------------------------
/flashsloth/eval/webpage/figures/bard.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/eval/webpage/figures/bard.jpg


--------------------------------------------------------------------------------
/flashsloth/eval/webpage/figures/llama.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/eval/webpage/figures/llama.jpg


--------------------------------------------------------------------------------
/flashsloth/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/eval/webpage/figures/alpaca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/eval/webpage/figures/alpaca.png


--------------------------------------------------------------------------------
/flashsloth/eval/webpage/figures/vicuna.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/eval/webpage/figures/vicuna.jpeg


--------------------------------------------------------------------------------
/lmms_eval/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/internal_eval/internal_eval.yaml:
--------------------------------------------------------------------------------
1 | group: internal_eval
2 | task:
3 | - d170_cn
4 | - d170_en
5 | - dc100_en
6 | - dc200_cn
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/live_bench/live_bench_2406.yaml:
--------------------------------------------------------------------------------
1 | task: "live_bench_2406"
2 | dataset_name: 2024-06
3 | include: live_bench_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/live_bench/live_bench_2407.yaml:
--------------------------------------------------------------------------------
1 | task: "live_bench_2407"
2 | dataset_name: 2024-07
3 | include: live_bench_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/live_bench/live_bench_2409.yaml:
--------------------------------------------------------------------------------
1 | task: "live_bench_2409"
2 | dataset_name: 2024-09
3 | include: live_bench_template_yaml_v2
4 | 


--------------------------------------------------------------------------------
/lmms_eval/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/__pycache__/__main__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/__pycache__/__main__.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/api/__pycache__/task.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/api/__pycache__/task.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_math.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Math
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_math"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/flashsloth/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/__pycache__/constants.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/__pycache__/constants.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/__pycache__/mm_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/__pycache__/mm_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/train/__pycache__/save.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/train/__pycache__/save.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/__pycache__/evaluator.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/__pycache__/evaluator.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/api/__pycache__/filter.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/api/__pycache__/filter.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/api/__pycache__/group.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/api/__pycache__/group.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/api/__pycache__/metrics.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/api/__pycache__/metrics.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/api/__pycache__/model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/api/__pycache__/model.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/detailcaps/_default_template_detailcaps_yaml:
--------------------------------------------------------------------------------
1 | lmms_eval_specific_kwargs:
2 |   default:
3 |     prompt: "Describe this image in detail."


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_music.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Music
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_music"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_seg_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_val
3 | test_split: val
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/flashsloth/__pycache__/conversation.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/__pycache__/conversation.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/model/__pycache__/fusion.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/__pycache__/fusion.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/model/__pycache__/ldpnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/__pycache__/ldpnet.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/train/__pycache__/align.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/train/__pycache__/align.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/train/__pycache__/train.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/train/__pycache__/train.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/api/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/api/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/api/__pycache__/instance.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/api/__pycache__/instance.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/api/__pycache__/registry.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/api/__pycache__/registry.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/api/__pycache__/samplers.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/api/__pycache__/samplers.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/caching/__pycache__/cache.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/caching/__pycache__/cache.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/loggers/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/loggers/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/models/__pycache__/llava.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/models/__pycache__/llava.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_biology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Biology
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_biology"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_design.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Design
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_design"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_manage.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Manage
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_manage"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_physics.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Physics
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_physics"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmmu/_default_template_yaml:
--------------------------------------------------------------------------------
1 | generation_kwargs:
2 |   max_new_tokens: 16
3 | 
4 | metadata:
5 |   version: 0.0
6 |   interleaved_format: false


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_seg_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_seg
2 | task: refcoco+_seg_val
3 | include: _default_template_seg_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_bbox_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_val
3 | test_split: val
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_seg_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_test
3 | test_split: test
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcocog/refcocog_seg_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_seg
2 | task: refcocog_seg_val
3 | include: _default_template_seg_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/flashsloth/model/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/model/__pycache__/builder.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/__pycache__/builder.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/model/__pycache__/lqformer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/__pycache__/lqformer.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/model/__pycache__/pooling.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/__pycache__/pooling.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/train/__pycache__/finetune.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/train/__pycache__/finetune.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/train/__pycache__/pretrain.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/train/__pycache__/pretrain.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/__pycache__/evaluator_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/__pycache__/evaluator_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/caching/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/caching/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/filters/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/filters/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/loggers/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/loggers/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/models/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/models/__pycache__/flash_hd.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/models/__pycache__/flash_hd.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/models/__pycache__/imp_llava.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/models/__pycache__/imp_llava.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/models/__pycache__/llava_hr.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/models/__pycache__/llava_hr.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/ai2d/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/ai2d/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/gqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/gqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_chemistry.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Chemistry
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_chemistry"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_economics.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Economics
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_economics"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_finance.yaml:
--------------------------------------------------------------------------------
1 | 
2 | dataset_name: Finance
3 | tag: "jmmmu_culture_agnostic"
4 | task: "jmmmu_finance"
5 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_marketing.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Marketing
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_marketing"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_materials.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Materials
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_materials"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_pharmacy.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Pharmacy
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_pharmacy"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/mme/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/mme/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmmu/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/mmmu/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmmu_pro/_default_template_yaml:
--------------------------------------------------------------------------------
1 | generation_kwargs:
2 |   max_new_tokens: 256
3 | 
4 | metadata:
5 |   version: 0.0
6 |   interleaved_format: false


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmmu_pro/mmmu_pro_cot.yaml:
--------------------------------------------------------------------------------
1 | group: mmmu_pro_cot
2 | task:
3 | - mmmu_pro_vision_cot
4 | - mmmu_pro_composite_cot
5 | - mmmu_pro_original_cot
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/pope/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/pope/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_bbox_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox
2 | task: refcoco+_bbox_val
3 | include: _default_template_bbox_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_seg_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_seg
2 | task: refcoco+_seg_testA
3 | include: _default_template_seg_yaml
4 | test_split: testA
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_seg_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_seg
2 | task: refcoco+_seg_testB
3 | include: _default_template_seg_yaml
4 | test_split: testB
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_bbox_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_test
3 | test_split: test
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_bbox_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_testA
3 | test_split: testA
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_bbox_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_testB
3 | test_split: testB
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_seg_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_testA
3 | test_split: testA
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_seg_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_testB
3 | test_split: testB
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcocog/refcocog_bbox_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_bbox
2 | task: refcocog_bbox_val
3 | include: _default_template_bbox_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcocog/refcocog_seg_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_seg
2 | task: refcocog_seg_test
3 | include: _default_template_seg_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/flashsloth/eval/__pycache__/eval_textvqa.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/eval/__pycache__/eval_textvqa.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/model/__pycache__/llava_arch.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/__pycache__/llava_arch.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/train/__pycache__/finetune_hd.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/train/__pycache__/finetune_hd.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/filters/__pycache__/extraction.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/filters/__pycache__/extraction.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/filters/__pycache__/selection.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/filters/__pycache__/selection.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/models/__pycache__/FlashSloth.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/models/__pycache__/FlashSloth.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/docvqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/docvqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_accounting.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Accounting
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_accounting"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_psychology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Psychology
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_psychology"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmvet/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/mmvet/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/nocaps/_default_template_nocaps_yaml:
--------------------------------------------------------------------------------
1 | lmms_eval_specific_kwargs:
2 |   default:
3 |     prompt: "Provide a one-sentence caption for the provided image."


--------------------------------------------------------------------------------
/lmms_eval/tasks/ok_vqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/ok_vqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_bbox_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox
2 | task: refcoco+_bbox_testA
3 | include: _default_template_bbox_yaml
4 | test_split: testA
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_bbox_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox
2 | task: refcoco+_bbox_testB
3 | include: _default_template_bbox_yaml
4 | test_split: testB
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcocog/_refcoco.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog
2 | task:
3 | - refcocog_seg_test
4 | - refcocog_seg_val
5 | - refcocog_bbox_test
6 | - refcocog_bbox_val
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcocog/refcocog_bbox_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_bbox
2 | task: refcocog_bbox_test
3 | include: _default_template_bbox_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/screenspot/screenspot_rec_test.yaml:
--------------------------------------------------------------------------------
1 | group: screenspot_rec
2 | task: screenspot_rec_test
3 | include: _default_template_rec_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/screenspot/screenspot_reg_test.yaml:
--------------------------------------------------------------------------------
1 | group: screenspot_reg
2 | task: screenspot_reg_test
3 | include: _default_template_reg_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/textcaps/_default_template_textcaps_yaml:
--------------------------------------------------------------------------------
1 | lmms_eval_specific_kwargs:
2 |   default:
3 |     prompt: Provide a one-sentence caption for the provided image.


--------------------------------------------------------------------------------
/lmms_eval/tasks/vqav2/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/vqav2/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/eval/__pycache__/m4c_evaluator.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/eval/__pycache__/m4c_evaluator.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/model/__pycache__/lqformer_new.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/__pycache__/lqformer_new.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/train/__pycache__/llava_trainer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/train/__pycache__/llava_trainer.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/loggers/__pycache__/wandb_logger.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/loggers/__pycache__/wandb_logger.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/models/__pycache__/FlashSloth_HD.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/models/__pycache__/FlashSloth_HD.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/chartqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/chartqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/coco_cap/coco_cap.yaml:
--------------------------------------------------------------------------------
1 | group : coco_cap
2 | task:
3 |   - coco2014_cap_val
4 |   - coco2014_cap_test
5 |   - coco2017_cap_val
6 |   - coco2017_cap_test
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/infovqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/infovqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_agriculture.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Agriculture
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_agriculture"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_electronics.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Electronics
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_electronics"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_japanese_art.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Japanese_Art
2 | tag: "jmmmu_culture_specific"
3 | task: "jmmmu_japanese_art"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/mathvista/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/mathvista/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/ocrbench/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/ocrbench/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_bbox_rec_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox_rec
2 | task: refcoco_bbox_rec_val
3 | test_split: val
4 | include: _default_template_bbox_rec_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/scienceqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/scienceqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/seedbench/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/seedbench/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/textvqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/textvqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/videochatgpt/_videochatgpt.yaml:
--------------------------------------------------------------------------------
1 | group: videochatgpt
2 | task:
3 |   - videochatgpt_gen
4 |   - videochatgpt_temporal
5 |   - videochatgpt_consistency
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/youcook2/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/YouCook2
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: YouCookIIVideos
6 | 


--------------------------------------------------------------------------------
/flashsloth/eval/__pycache__/model_vqa_loader.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/eval/__pycache__/model_vqa_loader.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/eval/__pycache__/model_vqa_mmbench.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/eval/__pycache__/model_vqa_mmbench.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/eval/__pycache__/model_vqa_science.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/eval/__pycache__/model_vqa_science.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/model/__pycache__/learnable_token.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/__pycache__/learnable_token.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/filters/__pycache__/transformation.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/filters/__pycache__/transformation.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/n_shot/gpqa_main_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_n_shot_yaml
4 | task: gpqa_main_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_public_health.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Public_Health
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_public_health"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_world_history.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: World_History
2 | tag: "jmmmu_culture_specific"
3 | task: "jmmmu_world_history"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmbench/__pycache__/cc_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/mmbench/__pycache__/cc_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmbench/__pycache__/cn_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/mmbench/__pycache__/cn_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmbench/__pycache__/en_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/mmbench/__pycache__/en_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/olympiadbench/olympiadbench.yaml:
--------------------------------------------------------------------------------
1 | group: olympiadbench
2 | task:
3 | - olympiadbench_test_en
4 | - olympiadbench_test_cn
5 | metadata:
6 |   - version: 0.0
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/realworldqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/realworldqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_bbox_rec_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox_rec
2 | task: refcoco+_bbox_rec_val
3 | include: _default_template_bbox_rec_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_bbox_rec_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox_rec
2 | task: refcoco_bbox_rec_test
3 | test_split: test
4 | include: _default_template_bbox_rec_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcocog/refcocog_bbox_rec_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_bbox_rec
2 | task: refcocog_bbox_rec_val
3 | include: _default_template_bbox_rec_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/vizwiz_vqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/vizwiz_vqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/loggers/__pycache__/evaluation_tracker.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/loggers/__pycache__/evaluation_tracker.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/zeroshot/gpqa_main_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_zeroshot_yaml
4 | task: gpqa_main_zeroshot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_bbox_rec_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox_rec
2 | task: refcoco+_bbox_rec_testA
3 | include: _default_template_bbox_rec_yaml
4 | test_split: testA
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_bbox_rec_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox_rec
2 | task: refcoco+_bbox_rec_testB
3 | include: _default_template_bbox_rec_yaml
4 | test_split: testB
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_bbox_rec_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox_rec
2 | task: refcoco_bbox_rec_testA
3 | test_split: testA
4 | include: _default_template_bbox_rec_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_bbox_rec_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox_rec
2 | task: refcoco_bbox_rec_testB
3 | test_split: testB
4 | include: _default_template_bbox_rec_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcocog/refcocog_bbox_rec_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_bbox_rec
2 | task: refcocog_bbox_rec_test
3 | include: _default_template_bbox_rec_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/_task_utils/__pycache__/file_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/_task_utils/__pycache__/file_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/n_shot/gpqa_diamond_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_n_shot_yaml
4 | task: gpqa_diamond_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/n_shot/gpqa_extended_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_n_shot_yaml
4 | task: gpqa_extended_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/hallusion_bench/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/hallusion_bench/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_clinical_medicine.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Clinical_Medicine
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_clinical_medicine"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_computer_science.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Computer_Science
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_computer_science"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_energy_and_power.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Energy_and_Power
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_energy_and_power"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_japanese_heritage.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Japanese_Heritage
2 | tag: "jmmmu_culture_specific"
3 | task: "jmmmu_japanese_heritage"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_japanese_history.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Japanese_History
2 | tag: "jmmmu_culture_specific"
3 | task: "jmmmu_japanese_history"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmbench/__pycache__/mmbench_evals.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/mmbench/__pycache__/mmbench_evals.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/models/video_chatgpt/model/__init__.py:
--------------------------------------------------------------------------------
1 | from lmms_eval.models.video_chatgpt.model.video_chatgpt import (
2 |     VideoChatGPTConfig,
3 |     VideoChatGPTLlamaForCausalLM,
4 | )
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/cot_n_shot/gpqa_main_cot_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_cot_n_shot_yaml
4 | task: gpqa_main_cot_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/zeroshot/gpqa_diamond_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_zeroshot_yaml
4 | task: gpqa_diamond_zeroshot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mathvista/__pycache__/mathvista_evals.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/mathvista/__pycache__/mathvista_evals.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/mix_evals/mix_evals_video2text.yaml:
--------------------------------------------------------------------------------
1 | group: mix_evals_video2text
2 | task:
3 | # - mix_evals_video2text_openconv
4 | - mix_evals_video2text_mc
5 | - mix_evals_video2text_freeform


--------------------------------------------------------------------------------
/flashsloth/model/language_model/__pycache__/flashsloth.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/language_model/__pycache__/flashsloth.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/model/multimodal_encoder/__pycache__/builder.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/multimodal_encoder/__pycache__/builder.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/_task_utils/__pycache__/vqa_eval_metric.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/_task_utils/__pycache__/vqa_eval_metric.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/zeroshot/gpqa_extended_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_zeroshot_yaml
4 | task: gpqa_extended_zeroshot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/hallusion_bench/__pycache__/evaluate_hb.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/lmms_eval/tasks/hallusion_bench/__pycache__/evaluate_hb.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/internal_eval/_default_template_internal_eval_yaml:
--------------------------------------------------------------------------------
1 | lmms_eval_specific_kwargs:
2 |   default:
3 |     pre_prompt: ""
4 |     post_prompt: ""
5 | process_results_use_image: true
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmmu_pro/mmmu_pro.yaml:
--------------------------------------------------------------------------------
1 | group: mmmu_pro
2 | task:
3 | - mmmu_pro_vision
4 | # - mmmu_pro_composite # removing composite task in formal MMMU-Pro evaluation
5 | - mmmu_pro_standard
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/tempcompass/_tempcompass.yaml:
--------------------------------------------------------------------------------
1 | group: tempcompass
2 | task:
3 | - tempcompass_multi_choice
4 | - tempcompass_yes_no
5 | - tempcompass_caption_matching
6 | - tempcompass_captioning
7 | 


--------------------------------------------------------------------------------
/flashsloth/model/multimodal_projector/__pycache__/builder.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/multimodal_projector/__pycache__/builder.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/cot_n_shot/gpqa_diamond_cot_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_cot_n_shot_yaml
4 | task: gpqa_diamond_cot_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/cot_n_shot/gpqa_extended_cot_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_cot_n_shot_yaml
4 | task: gpqa_extended_cot_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/cot_zeroshot/gpqa_main_cot_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_cot_zeroshot_yaml
4 | task: gpqa_main_cot_zeroshot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_basic_medical_science.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Basic_Medical_Science
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_basic_medical_science"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_mechanical_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Mechanical_Engineering
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_mechanical_engineering"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/flashsloth/model/multimodal_encoder/__pycache__/clip_encoder.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/multimodal_encoder/__pycache__/clip_encoder.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/cot_zeroshot/gpqa_diamond_cot_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_cot_zeroshot_yaml
4 | task: gpqa_diamond_cot_zeroshot
5 | 


--------------------------------------------------------------------------------
/flashsloth/model/language_model/phi2/__pycache__/modeling_phi.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/language_model/phi2/__pycache__/modeling_phi.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/cot_zeroshot/gpqa_extended_cot_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_cot_zeroshot_yaml
4 | task: gpqa_extended_cot_zeroshot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/generative/gpqa_main_generative_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_generative_n_shot_yaml
4 | task: gpqa_main_generative_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/llava_interleave_bench/interleave_bench.yaml:
--------------------------------------------------------------------------------
1 | group: llava_interleave_bench
2 | task:
3 | - llava_interleave_bench_in_domain
4 | - llava_interleave_bench_out_domain
5 | - llava_interleave_bench_multi_view


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/_refcoco.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+
2 | task:
3 | - refcoco+_seg_val
4 | - refcoco+_seg_testA
5 | - refcoco+_seg_testB
6 | - refcoco+_bbox_val
7 | - refcoco+_bbox_testA
8 | - refcoco+_bbox_testB
9 | 


--------------------------------------------------------------------------------
/flashsloth/model/language_model/phi2/__pycache__/configuration_phi.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/language_model/phi2/__pycache__/configuration_phi.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/generative/gpqa_diamond_generative_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_generative_n_shot_yaml
4 | task: gpqa_diamond_generative_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_architecture_and_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Architecture_and_Engineering
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_architecture_and_engineering"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/live_bench/live_bench.yaml:
--------------------------------------------------------------------------------
 1 | group: live_bench
 2 | task:
 3 | - live_bench_2406
 4 | - live_bench_2407
 5 | - live_bench_2409
 6 | 
 7 | metadata:
 8 |   api_type: azure
 9 |   eval_with_mini: false
10 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mathvista/mathvista.yaml:
--------------------------------------------------------------------------------
1 | group: mathvista
2 | task:
3 |   - mathvista_testmini
4 |   - mathvista_test
5 | metadata:
6 |   version: 0.0
7 |   gpt_eval_model_name: "gpt-3.5-turbo"
8 |   quick_extract: false


--------------------------------------------------------------------------------
/lmms_eval/tasks/nextqa/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/NExTQA
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: nextqa
6 | metadata:
7 |   version: 0.0.1
8 |   load_package: True
9 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/vitatecs/_vitatecs.yaml:
--------------------------------------------------------------------------------
1 | group: vitatecs
2 | task:
3 | - vitatecs_direction
4 | - vitatecs_intensity
5 | - vitatecs_sequence
6 | - vitatecs_compositionality
7 | - vitatecs_localization
8 | - vitatecs_type
9 | 


--------------------------------------------------------------------------------
/flashsloth/model/multimodal_encoder/siglip/__pycache__/modeling_siglip.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/multimodal_encoder/siglip/__pycache__/modeling_siglip.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/docvqa/docvqa_val.yaml:
--------------------------------------------------------------------------------
1 | task: "docvqa_val"
2 | test_split: validation
3 | metric_list:
4 |   - metric: anls
5 |     aggregation: mean
6 |     higher_is_better: true
7 | include: _default_template_docvqa_yaml
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/generative/gpqa_extended_generative_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_generative_n_shot_yaml
4 | task: gpqa_extended_generative_n_shot
5 | 


--------------------------------------------------------------------------------
/flashsloth/model/multimodal_encoder/siglip/__pycache__/configuration_siglip.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/multimodal_encoder/siglip/__pycache__/configuration_siglip.cpython-310.pyc


--------------------------------------------------------------------------------
/flashsloth/model/multimodal_encoder/siglip/__pycache__/image_processing_imp.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/multimodal_encoder/siglip/__pycache__/image_processing_imp.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/_mmlu_stem.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_stem
 2 | group_alias: stem
 3 | task:
 4 |   - mmlu_stem_tasks
 5 | aggregate_metric_list:
 6 |   - metric: acc
 7 |     weight_by_size: True
 8 | metadata:
 9 |   version: 2
10 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmupd/mmupd_option.yaml:
--------------------------------------------------------------------------------
1 | group: mmupd_option
2 | task:
3 |   - mmaad_option
4 |   - mmiasd_option
5 |   - mmivqd_option
6 | metadata:
7 |   version: 0.0
8 |   sys_prompt: ""
9 |   gpt_eval_model_name: "gpt-3.5-turbo-0125"


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_diagnostics_and_laboratory_medicine.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Diagnostics_and_Laboratory_Medicine
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_diagnostics_and_laboratory_medicine"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/_mmlu_other.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_other
 2 | group_alias: other
 3 | task:
 4 |   - mmlu_other_tasks
 5 | aggregate_metric_list:
 6 |   - metric: acc
 7 |     weight_by_size: True
 8 | metadata:
 9 |   version: 2
10 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmupd/mmupd_base.yaml:
--------------------------------------------------------------------------------
 1 | group: mmupd_base
 2 | task:
 3 |   - mmaad_base
 4 |   - mmiasd_base
 5 |   - mmivqd_base
 6 | metadata:
 7 |   version: 0.0
 8 |   sys_prompt: ""
 9 |   gpt_eval_model_name: "gpt-3.5-turbo-0125"
10 |   


--------------------------------------------------------------------------------
/lmms_eval/tasks/worldqa/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/worldqa
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: multi-hop-reasoning 
6 | metadata:
7 |   version: 0.0
8 |   gpt_eval_model_name: "gpt-4-0613"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmbench/mmbench_cn.yaml:
--------------------------------------------------------------------------------
1 | group: mmbench_cn
2 | task:
3 |   - mmbench_cn_dev
4 |   - mmbench_cn_test
5 |   - mmbench_cn_cc
6 | metadata:
7 |   version: 0.0
8 |   gpt_eval_model_name: "gpt-3.5-turbo-0613"
9 |   sys_prompt: "有如下几个选项："


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmbench/mmbench_en.yaml:
--------------------------------------------------------------------------------
1 | group: mmbench_en
2 | task:
3 |   - mmbench_en_dev
4 |   - mmbench_en_test
5 | metadata:
6 |   version: 0.0
7 |   sys_prompt: "There are several options:"
8 |   gpt_eval_model_name: "gpt-3.5-turbo-0613"
9 | 


--------------------------------------------------------------------------------
/flashsloth/model/multimodal_encoder/siglip/__pycache__/image_processing_flashsloth.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefanw/FlashSloth/HEAD/flashsloth/model/multimodal_encoder/siglip/__pycache__/image_processing_flashsloth.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms_eval/tasks/egoschema/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/egoschema
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: egoschema
6 | lmms_eval_specific_kwargs:
7 |   default:
8 |     pre_prompt: ""
9 |     post_prompt: ""


--------------------------------------------------------------------------------
/lmms_eval/tasks/infovqa/infovqa_val.yaml:
--------------------------------------------------------------------------------
1 | task: "infovqa_val"
2 | test_split: validation
3 | output_type: generate_until
4 | metric_list:
5 |   - metric: anls
6 |     aggregation: mean
7 |     higher_is_better: true
8 | include: _default_template_infovqa_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/_mmlu_humanities.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_humanities
 2 | group_alias: humanities
 3 | task:
 4 |   - mmlu_humanities_tasks
 5 | aggregate_metric_list:
 6 |   - metric: acc
 7 |     weight_by_size: True
 8 | metadata:
 9 |   version: 2
10 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmupd/mmupd_instruction.yaml:
--------------------------------------------------------------------------------
1 | group: mmupd_instruction
2 | task:
3 |   - mmaad_instruction
4 |   - mmiasd_instruction
5 |   - mmivqd_instruction
6 | metadata:
7 |   version: 0.0
8 |   sys_prompt: ""
9 |   gpt_eval_model_name: "gpt-3.5-turbo-0125"


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/urdu_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: urdu
4 |     token: True
5 | task: "llava_in_the_wild_urdu"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch==2.0.1
 2 | transformers==4.38.0
 3 | sentencepiece==0.1.99
 4 | accelerate==0.23.0
 5 | peft==0.4.0
 6 | bitsandbytes==0.41.0
 7 | scikit-learn==1.2.2
 8 | einops==0.6.1
 9 | deepspeed==0.9.5
10 | pillow
11 | shortuuid
12 | numpy<2.0.0


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/arabic_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: arabic
4 |     token: True
5 | task: "llava_in_the_wild_arabic"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/french_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: french
4 |     token: True
5 | task: "llava_in_the_wild_french"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/hindi_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: hindi
4 |     token: True
5 | task: "llava_in_the_wild_hindi"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/spanish_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |   config: spanish
4 |   token: True
5 | task: "llava_in_the_wild_spanish"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/_refcoco.yaml:
--------------------------------------------------------------------------------
 1 | group: refcoco
 2 | task:
 3 | - refcoco_seg_test
 4 | - refcoco_seg_val
 5 | - refcoco_seg_testA
 6 | - refcoco_seg_testB
 7 | - refcoco_bbox_test
 8 | - refcoco_bbox_val
 9 | - refcoco_bbox_testA
10 | - refcoco_bbox_testB
11 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/bengali_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: bengali
4 |     token: True
5 | task: "llava_in_the_wild_bengali"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/chinese_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: chinese
4 |     token: True
5 | task: "llava_in_the_wild_chinese"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/russian_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: russian
4 |     token: True
5 | task: "llava_in_the_wild_russian"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/textvqa/textvqa_test.yaml:
--------------------------------------------------------------------------------
1 | task: textvqa_test
2 | test_split: test
3 | metric_list:
4 |   - metric: submission
5 |     aggregation: !function utils.textvqa_aggregate_submissions
6 |     higher_is_better: true
7 | include: _default_template_textvqa_yaml
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/_mmlu.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu
 2 | task:
 3 |   - mmlu_stem
 4 |   - mmlu_other
 5 |   - mmlu_social_sciences
 6 |   - mmlu_humanities
 7 | aggregate_metric_list:
 8 |   - metric: acc
 9 |     weight_by_size: True
10 | metadata:
11 |   version: 2
12 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/japanese_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: japanese
4 |     token: True
5 | task: "llava_in_the_wild_japanese"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/perceptiontest/test/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/PerceptionTest
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: perceptiontest
6 | lmms_eval_specific_kwargs:
7 |   default:
8 |     pre_prompt: ""
9 |     post_prompt: ""


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/_mmlu_social_sciences.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_social_sciences
 2 | group_alias: social sciences
 3 | task:
 4 |   - mmlu_social_sciences_tasks
 5 | aggregate_metric_list:
 6 |   - metric: acc
 7 |     weight_by_size: True
 8 | metadata:
 9 |   version: 2
10 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/perceptiontest/val/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/PerceptionTest_Val
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: perceptiontest_val
6 | lmms_eval_specific_kwargs:
7 |   default:
8 |     pre_prompt: ""
9 |     post_prompt: ""


--------------------------------------------------------------------------------
/lmms_eval/tasks/llava_interleave_bench/_default_template_interleave_yaml:
--------------------------------------------------------------------------------
1 | output_type: generate_until
2 | generation_kwargs:
3 |   until:
4 |     - "ASSISTANT:"
5 |   image_aspect_ratio: pad
6 | metadata:
7 |   version: 0.0
8 |   api_type : openai
9 |   gpt_eval_model_name: "gpt-3.5-turbo"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mathvista/mathvista_testmini.yaml:
--------------------------------------------------------------------------------
1 | group: mathvista_testmini
2 | task:
3 |   - mathvista_testmini_cot
4 |   - mathvista_testmini_solution
5 |   - mathvista_testmini_format
6 | metadata:
7 |   version: 0.0
8 |   gpt_eval_model_name: "gpt-3.5-turbo"
9 |   quick_extract: false


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmbench/mmbench_cn_test.yaml:
--------------------------------------------------------------------------------
1 | task: mmbench_cn_test
2 | test_split: test
3 | metric_list:
4 |   - metric: submission
5 |     aggregation: !function cn_utils.mmbench_aggregate_test_results
6 |     higher_is_better: true
7 | include: _default_template_mmbench_cn_yaml
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmbench/mmbench_en_test.yaml:
--------------------------------------------------------------------------------
1 | task: "mmbench_en_test"
2 | test_split: test
3 | include: _default_template_mmbench_en_yaml
4 | metric_list:
5 |   - metric: submission
6 |     aggregation: !function en_utils.mmbench_aggregate_test_results
7 |     higher_is_better: true
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_anatomy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_action_count.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_action_count
3 | dataset_name: action_count
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_count
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_moving_count.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_moving_count
3 | dataset_name: moving_count
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: moving_count
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_state_change.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_state_change
3 | dataset_name: state_change
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: state_change
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_virology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_astronomy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_marketing"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_nutrition"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmt/_default_template_yaml:
--------------------------------------------------------------------------------
1 | lmms_eval_specific_kwargs:
2 |   default:
3 |     pre_prompt: ""
4 |     post_prompt: "\nAnswer the question using a single character from the given options."
5 | generation_kwargs:
6 |   max_new_tokens: 8
7 | metadata:
8 |   version: 0.0
9 |   task_type: image


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_action_antonym.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_action_antonym
3 | dataset_name: action_antonym
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_antonym
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_object_shuffle.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_object_shuffle
3 | dataset_name: object_shuffle
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: object_shuffle
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/vitatecs/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lscpku/VITATECS
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: vitatecs
6 | lmms_eval_specific_kwargs:
7 |   default:
8 |     pre_prompt: ""
9 |     post_prompt: "\nPlease response with a single letter (A or B):"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_management"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_anatomy"
7 | "task_alias": "anatomy"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_action_sequence.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_action_sequence
3 | dataset_name: action_sequence
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_sequence
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_character_order.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_character_order
3 | dataset_name: character_order
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: character_order
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_human_aging"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_philosophy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_prehistory"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_sociology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_virology"
7 | "task_alias": "virology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_action_prediction.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_action_prediction
3 | dataset_name: action_prediction
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_prediction
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_fine_grained_pose.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_fine_grained_pose
3 | dataset_name: fine_grained_pose
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: fine_grained_pose
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_moving_attribute.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_moving_attribute
3 | dataset_name: moving_attribute
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: moving_attribute
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_moving_direction.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_moving_direction
3 | dataset_name: moving_direction
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: moving_direction
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_object_existence.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_object_existence
3 | dataset_name: object_existence
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: object_existence
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_scene_transition.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_scene_transition
3 | dataset_name: scene_transition
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: scene_transition
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_unexpected_action.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_unexpected_action
3 | dataset_name: unexpected_action
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: unexpected_action
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/_task_utils/file_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | 
4 | def generate_submission_file(file_name, args, subpath="submissions"):
5 |     path = os.path.join(args.output_path, subpath)
6 |     os.makedirs(path, exist_ok=True)
7 |     path = os.path.join(path, file_name)
8 |     return os.path.abspath(path)
9 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_global_facts"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_miscellaneous"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_astronomy"
7 | "task_alias": "astronomy"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_marketing"
7 | "task_alias": "marketing"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_nutrition"
7 | "task_alias": "nutrition"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_object_interaction.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_object_interaction
3 | dataset_name: object_interaction
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: object_interaction
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_formal_logic"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_jurisprudence"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_management"
7 | "task_alias": "management"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_action_localization.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_action_localization
3 | dataset_name: action_localization
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_localization
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_fine_grained_action.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_fine_grained_action
3 | dataset_name: fine_grained_action
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: fine_grained_action
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/cmmmu/_default_template_cmmmu_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/CMMMU
2 | output_type: generate_until
3 | doc_to_visual: !function utils.cmmmu_doc_to_visual
4 | doc_to_text: !function utils.cmmmu_doc_to_text
5 | doc_to_target: "answer"
6 | generation_kwargs:
7 |   max_new_tokens: 16
8 |   image_aspect_ratio: original


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_business_ethics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_biology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_econometrics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_moral_disputes"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_philosophy"
7 | "task_alias": "philosophy"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_prehistory"
7 | "task_alias": "prehistory"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_sociology"
7 | "task_alias": "sociology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_anatomy_generative"
7 | "task_alias": "anatomy"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/vqav2/vqav2_test.yaml:
--------------------------------------------------------------------------------
1 | task: "vqav2_test"
2 | include: _default_template_vqav2_yaml
3 | test_split: test
4 | metric_list:
5 |   - metric: submission
6 |     aggregation: !function utils.vqav2_aggregate_submissions
7 |     higher_is_better: true
8 | process_results: !function utils.vqav2_process_results_test
9 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_abstract_algebra"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_medicine"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_machine_learning"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_medical_genetics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_moral_scenarios"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_world_religions"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_human_aging"
7 | "task_alias": "human_aging"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_virology_generative"
7 | "task_alias": "virology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_egocentric_navigation.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_egocentric_navigation
3 | dataset_name: egocentric_navigation
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: egocentric_navigation
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/docvqa/docvqa_test.yaml:
--------------------------------------------------------------------------------
1 | task: "docvqa_test"
2 | test_split: test
3 | process_results: !function utils.docvqa_test_process_results
4 | metric_list:
5 |   - metric: submission
6 |     aggregation: !function utils.docvqa_test_aggregate_results
7 |     higher_is_better: true
8 | include: _default_template_docvqa_yaml
9 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_chemistry"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_computer_security"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_conceptual_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_human_sexuality"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_professional_law"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_global_facts"
7 | "task_alias": "global_facts"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_miscellaneous"
7 | "task_alias": "miscellaneous"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_anatomy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_astronomy_generative"
7 | "task_alias": "astronomy"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_marketing_generative"
7 | "task_alias": "marketing"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_nutrition_generative"
7 | "task_alias": "nutrition"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/wild_vision_bench/wild_vision_bench0617.yaml:
--------------------------------------------------------------------------------
 1 | task: wildvision_0617
 2 | dataset_name: release_bench_0617_with_modelresponse 
 3 | test_split: test500
 4 | output_type: generate_until
 5 | include: _default_template_yaml
 6 | lmms_eval_specific_kwargs:
 7 |   default:
 8 |     pre_prompt: ""
 9 |     post_prompt: ""
10 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/wild_vision_bench/wild_vision_bench0630.yaml:
--------------------------------------------------------------------------------
 1 | task: wildvision_0630
 2 | dataset_name: release_bench_0630_with_modelresponse 
 3 | test_split: test500
 4 | output_type: generate_until
 5 | include: _default_template_yaml
 6 | lmms_eval_specific_kwargs:
 7 |   default:
 8 |     pre_prompt: ""
 9 |     post_prompt: ""
10 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmbench/mmbench.yaml:
--------------------------------------------------------------------------------
 1 | group: mmbench
 2 | task:
 3 |   - mmbench_en_dev
 4 |   - mmbench_en_test
 5 |   - mmbench_cn_dev
 6 |   - mmbench_cn_test
 7 |   - mmbench_cn_cc
 8 |   - mmbench_ru_dev
 9 | metadata:
10 |   version: 0.0
11 |   sys_prompt: "There are several options:"
12 |   gpt_eval_model_name: "gpt-3.5-turbo-0613"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_clinical_knowledge"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_mathematics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_biology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_international_law"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_logical_fallacies"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_public_relations"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_security_studies"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_econometrics"
7 | "task_alias": "econometrics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_formal_logic"
7 | "task_alias": "formal_logic"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_jurisprudence"
7 | "task_alias": "jurisprudence"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_astronomy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_marketing"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_nutrition"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_virology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_management_generative"
7 | "task_alias": "management"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_counterfactual_inference.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_counterfactual_inference
3 | dataset_name: counterfactual_inference
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: counterfactual_inference
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/cvrr/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/CVRR-ES
 2 | dataset_kwargs:
 3 |   token: True
 4 |   video: True
 5 |   cache_dir: cvrr-es
 6 | lmms_eval_specific_kwargs:
 7 |   default:
 8 |     pre_prompt: ""
 9 |     post_prompt: ""
10 | 
11 | metadata:
12 |   version: 0.0
13 |   gpt_eval_model_name: gpt-3.5-turbo-0125


--------------------------------------------------------------------------------
/lmms_eval/tasks/mathverse/mathverse_testmini_text.yaml:
--------------------------------------------------------------------------------
 1 | group: mathverse_testmini_text
 2 | task:
 3 |   - mathverse_testmini_text_lite
 4 |   - mathverse_testmini_text_dominant
 5 |   - mathverse_testmini_text_only
 6 | metadata:
 7 |   version: 0.0
 8 |   gpt_eval_model_name: "gpt-3.5-turbo"
 9 |   trunk_response: 30
10 |   quick_match: false


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_us_foreign_policy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_biology"
7 | "task_alias": "college_biology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_physics"
7 | "task_alias": "college_physics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_management"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_anatomy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_human_aging_generative"
7 | "task_alias": "human_aging"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_philosophy_generative"
7 | "task_alias": "philosophy"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_prehistory_generative"
7 | "task_alias": "prehistory"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_sociology_generative"
7 | "task_alias": "sociology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/models/video_chatgpt/constants.py:
--------------------------------------------------------------------------------
 1 | CONTROLLER_HEART_BEAT_EXPIRATION = 30
 2 | WORKER_HEART_BEAT_INTERVAL = 15
 3 | 
 4 | LOGDIR = "."
 5 | 
 6 | 
 7 | # Defining model
 8 | DEFAULT_VIDEO_TOKEN = "<video>"
 9 | DEFAULT_VIDEO_PATCH_TOKEN = "<vid_patch>"
10 | DEFAULT_VID_START_TOKEN = "<vid_start>"
11 | DEFAULT_VID_END_TOKEN = "<vid_end>"
12 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_chemistry"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school chemistry.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_chemistry"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_professional_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_medicine"
2 | "description": "The following are questions (with answers) about professional\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_professional_medicine"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are multiple choice questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_abstract_algebra"
7 | "task_alias": "abstract_algebra"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_business_ethics"
7 | "task_alias": "business_ethics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are multiple choice questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_machine_learning"
7 | "task_alias": "machine_learning"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_moral_disputes"
7 | "task_alias": "moral_disputes"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_human_aging"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_philosophy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_prehistory"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_sociology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_virology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_global_facts_generative"
7 | "task_alias": "global_facts"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/models/video_chatgpt/utils.py:
--------------------------------------------------------------------------------
1 | def disable_torch_init():
2 |     """
3 |     Disable the redundant torch default initialization to accelerate model creation.
4 |     """
5 |     import torch
6 | 
7 |     setattr(torch.nn.Linear, "reset_parameters", lambda self: None)
8 |     setattr(torch.nn.LayerNorm, "reset_parameters", lambda self: None)
9 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_electrical_engineering.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "electrical_engineering"
2 | "description": "The following are questions (with answers) about electrical\
3 |   \ engineering.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_electrical_engineering"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_elementary_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "elementary_mathematics"
2 | "description": "The following are questions (with answers) about elementary\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_elementary_mathematics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_mathematics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school mathematics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_mathematics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_statistics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_statistics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school statistics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_statistics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_chemistry"
7 | "task_alias": "college_chemistry"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_medicine"
7 | "task_alias": "college_medicine"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are multiple choice questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_computer_security"
7 | "task_alias": "computer_security"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are multiple choice questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_medical_genetics"
7 | "task_alias": "medical_genetics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_moral_scenarios"
7 | "task_alias": "moral_scenarios"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are multiple choice questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_world_religions"
7 | "task_alias": "world_religions"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_global_facts"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_astronomy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_marketing"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_nutrition"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_formal_logic_generative"
7 | "task_alias": "formal_logic"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_miscellaneous_generative"
7 | "task_alias": "miscellaneous"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mathverse/mathverse_testmini_vision.yaml:
--------------------------------------------------------------------------------
 1 | group: mathverse_testmini_vision
 2 | task:
 3 |   - mathverse_testmini_vision_intensive
 4 |   - mathverse_testmini_vision_dominant
 5 |   - mathverse_testmini_vision_only
 6 | metadata:
 7 |   version: 0.0
 8 |   gpt_eval_model_name: "gpt-3.5-turbo"
 9 |   trunk_response: 30
10 |   quick_match: false


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_college_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_computer_science"
2 | "description": "The following are questions (with answers) about college\
3 |   \ computer science.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_computer_science"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_geography.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_geography"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school geography.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_geography"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_us_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_us_history"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school us history.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_us_history"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_professional_accounting.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_accounting"
2 | "description": "The following are questions (with answers) about professional\
3 |   \ accounting.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_professional_accounting"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are multiple choice questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_conceptual_physics"
7 | "task_alias": "conceptual_physics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_human_sexuality"
7 | "task_alias": "human_sexuality"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_law"
7 | "task_alias": "professional_law"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_formal_logic"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_miscellaneous"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_management"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_anatomy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_econometrics_generative"
7 | "task_alias": "econometrics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_jurisprudence_generative"
7 | "task_alias": "jurisprudence"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/videochatgpt/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/VideoChatGPT
 2 | dataset_kwargs:
 3 |   token: True
 4 |   video: True
 5 |   cache_dir: videochatgpt
 6 | lmms_eval_specific_kwargs:
 7 |   default:
 8 |     pre_prompt: ""
 9 |     post_prompt: ""
10 | 
11 | metadata:
12 |   version: 0.0
13 |   gpt_eval_model_name: gpt-3.5-turbo-0613


--------------------------------------------------------------------------------
/lmms_eval/tasks/vqav2/vqav2_val.yaml:
--------------------------------------------------------------------------------
 1 | task: "vqav2_val"
 2 | include: _default_template_vqav2_yaml
 3 | test_split: validation
 4 | metric_list:
 5 |   - metric: exact_match
 6 |     aggregation: mean
 7 |     higher_is_better: true
 8 |     ignore_case: true
 9 |     ignore_punctuation: true
10 | process_results: !function utils.vqav2_process_results_val
11 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_psychology"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school psychology.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_psychology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are multiple choice questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_clinical_knowledge"
7 | "task_alias": "clinical_knowledge"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_mathematics"
7 | "task_alias": "college_mathematics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_biology"
7 | "task_alias": "high_school_biology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_physics"
7 | "task_alias": "high_school_physics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are multiple choice questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_international_law"
7 | "task_alias": "international_law"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are multiple choice questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_logical_fallacies"
7 | "task_alias": "logical_fallacies"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are multiple choice questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_public_relations"
7 | "task_alias": "public_relations"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are multiple choice questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_security_studies"
7 | "task_alias": "security_studies"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_business_ethics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_biology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_econometrics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_jurisprudence"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_human_aging"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_philosophy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_prehistory"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_sociology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_astronomy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_marketing"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_nutrition"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_virology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_business_ethics_generative"
7 | "task_alias": "business_ethics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_biology_generative"
7 | "task_alias": "college_biology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_physics_generative"
7 | "task_alias": "college_physics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_moral_disputes_generative"
7 | "task_alias": "moral_disputes"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_world_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_world_history"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school world history.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_world_history"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_professional_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_psychology"
2 | "description": "The following are questions (with answers) about professional\
3 |   \ psychology.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_professional_psychology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are multiple choice questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_us_foreign_policy"
7 | "task_alias": "us_foreign_policy"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are multiple choice questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_abstract_algebra"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_medicine"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are multiple choice questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_machine_learning"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are multiple choice questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_medical_genetics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_moral_disputes"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_moral_scenarios"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are multiple choice questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_world_religions"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_global_facts"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_miscellaneous"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_management"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are multiple choice questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_abstract_algebra_generative"
7 | "task_alias": "abstract_algebra"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_medicine_generative"
7 | "task_alias": "college_medicine"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are multiple choice questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_machine_learning_generative"
7 | "task_alias": "machine_learning"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are multiple choice questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_medical_genetics_generative"
7 | "task_alias": "medical_genetics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_moral_scenarios_generative"
7 | "task_alias": "moral_scenarios"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are multiple choice questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_world_religions_generative"
7 | "task_alias": "world_religions"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_computer_science"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school computer science.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_computer_science"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_high_school_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school chemistry.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_chemistry"
7 | "task_alias": "high_school_chemistry"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_chemistry"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are multiple choice questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_computer_security"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_human_sexuality"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_professional_law"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_econometrics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_formal_logic"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_jurisprudence"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_human_aging"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_philosophy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_prehistory"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_sociology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_chemistry_generative"
7 | "task_alias": "college_chemistry"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are multiple choice questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_computer_security_generative"
7 | "task_alias": "computer_security"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_human_sexuality_generative"
7 | "task_alias": "human_sexuality"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_law_generative"
7 | "task_alias": "professional_law"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu_pro/mmlu_pro_law.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about law. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_law"
4 | task_alias: "law"
5 | process_docs: !function utils.process_law
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmupd/mmupd.yaml:
--------------------------------------------------------------------------------
 1 | group: mmupd
 2 | task:
 3 |   - mmaad_base
 4 |   - mmaad_option
 5 |   - mmaad_instruction
 6 |   - mmiasd_base
 7 |   - mmiasd_option
 8 |   - mmiasd_instruction
 9 |   - mmivqd_base
10 |   - mmivqd_option
11 |   - mmivqd_instruction
12 | metadata:
13 |   version: 0.0
14 |   sys_prompt: ""
15 |   gpt_eval_model_name: "gpt-3.5-turbo-0125"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_macroeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_macroeconomics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school macroeconomics.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_macroeconomics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_microeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_microeconomics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school microeconomics.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_microeconomics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_electrical_engineering.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "electrical_engineering"
2 | "description": "The following are multiple choice questions (with answers) about electrical\
3 |   \ engineering.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_electrical_engineering"
7 | "task_alias": "electrical_engineering"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_elementary_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "elementary_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about elementary\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_elementary_mathematics"
7 | "task_alias": "elementary_mathematics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_high_school_statistics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_statistics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school statistics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_statistics"
7 | "task_alias": "high_school_statistics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_professional_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_medicine"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_medicine"
7 | "task_alias": "professional_medicine"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are multiple choice questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_clinical_knowledge"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_mathematics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are multiple choice questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_conceptual_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_biology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are multiple choice questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_international_law"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are multiple choice questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_logical_fallacies"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are multiple choice questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_public_relations"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are multiple choice questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_security_studies"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_business_ethics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_biology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_moral_disputes"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_global_facts"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are multiple choice questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_clinical_knowledge_generative"
7 | "task_alias": "clinical_knowledge"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are multiple choice questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_conceptual_physics_generative"
7 | "task_alias": "conceptual_physics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are multiple choice questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_international_law_generative"
7 | "task_alias": "international_law"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are multiple choice questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_logical_fallacies_generative"
7 | "task_alias": "logical_fallacies"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are multiple choice questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_public_relations_generative"
7 | "task_alias": "public_relations"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are multiple choice questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_security_studies_generative"
7 | "task_alias": "security_studies"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu_pro/mmlu_pro_math.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about math. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_math"
4 | task_alias: "math"
5 | process_docs: !function utils.process_math
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmupd/mmaad_base.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmaad_base"
 2 | test_split: test
 3 | dataset_name: mmaad_base
 4 | lmms_eval_specific_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\n"
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmaad_base
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms_eval/tasks/infovqa/infovqa_test.yaml:
--------------------------------------------------------------------------------
 1 | task: "infovqa_test"
 2 | test_split: test
 3 | output_type: generate_until
 4 | process_results: !function utils.infovqa_test_process_results
 5 | metric_list:
 6 |   - metric: submission
 7 |     aggregation: !function utils.infovqa_test_aggregate_results
 8 |     higher_is_better: true
 9 | include: _default_template_infovqa_yaml
10 |   


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_european_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_european_history"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school european history.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_european_history"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_high_school_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school mathematics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_mathematics"
7 | "task_alias": "high_school_mathematics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are multiple choice questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_us_foreign_policy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are multiple choice questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_abstract_algebra"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_chemistry"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_medicine"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are multiple choice questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_computer_security"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are multiple choice questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_machine_learning"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are multiple choice questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_medical_genetics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_moral_scenarios"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are multiple choice questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_world_religions"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_formal_logic"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_miscellaneous"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_mathematics_generative"
7 | "task_alias": "college_mathematics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_biology_generative"
7 | "task_alias": "high_school_biology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_physics_generative"
7 | "task_alias": "high_school_physics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are multiple choice questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_us_foreign_policy_generative"
7 | "task_alias": "us_foreign_policy"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu_pro/mmlu_pro_other.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about other. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_other"
4 | task_alias: "other"
5 | process_docs: !function utils.process_other
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmupd/mmiasd_base.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmiasd_base"
 2 | test_split: test
 3 | dataset_name: mmiasd_base
 4 | lmms_eval_specific_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\n"
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmiasd_base
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmupd/mmivqd_base.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmivqd_base"
 2 | test_split: test
 3 | dataset_name: mmivqd_base
 4 | lmms_eval_specific_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\n"
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmivqd_base
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms_eval/tasks/video_detail_description/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/VideoDetailDescription
 2 | dataset_kwargs:
 3 |   token: True
 4 |   video: True
 5 |   cache_dir: videochatgpt
 6 | lmms_eval_specific_kwargs:
 7 |   default:
 8 |     pre_prompt: ""
 9 |     post_prompt: ""
10 | 
11 | metadata:
12 |   version: 0.0
13 |   gpt_eval_model_name: gpt-3.5-turbo-0613


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_college_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_computer_science"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ computer science.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_computer_science"
7 | "task_alias": "college_computer_science"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_high_school_geography.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_geography"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school geography.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_geography"
7 | "task_alias": "high_school_geography"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_high_school_us_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_us_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school us history.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_us_history"
7 | "task_alias": "high_school_us_history"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_professional_accounting.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_accounting"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ accounting.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_accounting"
7 | "task_alias": "professional_accounting"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school chemistry.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_chemistry"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_medicine"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_professional_medicine"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are multiple choice questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_clinical_knowledge"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are multiple choice questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_conceptual_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_human_sexuality"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_professional_law"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_business_ethics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_college_biology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_college_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_econometrics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_jurisprudence"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu_pro/mmlu_pro_health.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about health. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_health"
4 | task_alias: "health"
5 | process_docs: !function utils.process_health
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_high_school_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_psychology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school psychology.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_psychology"
7 | "task_alias": "high_school_psychology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_electrical_engineering.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "electrical_engineering"
2 | "description": "The following are multiple choice questions (with answers) about electrical\
3 |   \ engineering.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_electrical_engineering"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_elementary_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "elementary_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about elementary\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_elementary_mathematics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_statistics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_statistics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school statistics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_statistics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_mathematics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_high_school_biology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_high_school_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are multiple choice questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_international_law"
7 | 


--------------------------------------------------------------------------------