├── llava-ov-15
    └── src
    │   ├── __init__.py
    │   ├── serve
    │       └── __init__.py
    │   ├── train
    │       └── __init__.py
    │   ├── trainer
    │       └── __init__.py
    │   └── dataset
    │       └── __init__.py
├── lmms-eval
    ├── lmms_eval
    │   ├── __init__.py
    │   ├── api
    │   │   └── __init__.py
    │   ├── caching
    │   │   └── __init__.py
    │   ├── models
    │   │   ├── model_utils
    │   │   │   └── __init__.py
    │   │   └── video_chatgpt
    │   │   │   ├── eval
    │   │   │       └── __init__.py
    │   │   │   ├── __init__.py
    │   │   │   ├── model
    │   │   │       └── __init__.py
    │   │   │   ├── constants.py
    │   │   │   └── utils.py
    │   ├── tasks
    │   │   ├── ocrbench_v2
    │   │   │   ├── __init__.py
    │   │   │   └── spotting_eval
    │   │   │   │   └── __init__.py
    │   │   ├── _task_utils
    │   │   │   ├── gpt_eval_utils.py
    │   │   │   └── file_utils.py
    │   │   ├── cuva
    │   │   │   ├── cuva.yaml
    │   │   │   └── _default_template_yaml
    │   │   ├── multilingual-llava-bench-in-the-wild
    │   │   │   ├── README.md
    │   │   │   ├── urdu_llava_in_the_wild.yaml
    │   │   │   ├── arabic_llava_in_the_wild.yaml
    │   │   │   ├── french_llava_in_the_wild.yaml
    │   │   │   ├── hindi_llava_in_the_wild.yaml
    │   │   │   ├── spanish_llava_in_the_wild.yaml
    │   │   │   ├── bengali_llava_in_the_wild.yaml
    │   │   │   ├── chinese_llava_in_the_wild.yaml
    │   │   │   ├── russian_llava_in_the_wild.yaml
    │   │   │   └── japanese_llava_in_the_wild.yaml
    │   │   ├── funqa
    │   │   │   ├── funqa.yaml
    │   │   │   └── _default_template_yaml
    │   │   ├── mmt
    │   │   │   ├── mmt.yaml
    │   │   │   ├── mmt_mi.yaml
    │   │   │   └── _default_template_yaml
    │   │   ├── ok_vqa
    │   │   │   ├── _ok_vqa.yaml
    │   │   │   └── ok_vqa_val2014.yaml
    │   │   ├── flickr30k
    │   │   │   └── flickr30k.yaml
    │   │   ├── mmmu
    │   │   │   ├── mmmu.yaml
    │   │   │   ├── mmmu_group_img.yaml
    │   │   │   ├── arial.ttf
    │   │   │   └── _default_template_yaml
    │   │   ├── vqav2
    │   │   │   └── _vqav2.yaml
    │   │   ├── cmmmu
    │   │   │   ├── _cmmmu.yaml
    │   │   │   └── _default_template_cmmmu_yaml
    │   │   ├── docvqa
    │   │   │   ├── docvqa.yaml
    │   │   │   ├── docvqa_val.yaml
    │   │   │   └── docvqa_test.yaml
    │   │   ├── mmau
    │   │   │   ├── mmau.yaml
    │   │   │   ├── mmau_test_mini.yaml
    │   │   │   └── mmau_test.yaml
    │   │   ├── iconqa
    │   │   │   ├── iconqa.yaml
    │   │   │   ├── iconqa_test.yaml
    │   │   │   └── iconqa_val.yaml
    │   │   ├── mmsearch
    │   │   │   └── retrieve_content
    │   │   │   │   └── tokenization
    │   │   │   │       └── __init__.py
    │   │   ├── nocaps
    │   │   │   ├── nocaps.yaml
    │   │   │   └── _default_template_nocaps_yaml
    │   │   ├── textvqa
    │   │   │   ├── _textvqa.yaml
    │   │   │   └── textvqa_test.yaml
    │   │   ├── websrc
    │   │   │   └── websrc.yaml
    │   │   ├── fleurs
    │   │   │   ├── fleurs.yaml
    │   │   │   ├── fleurs_en.yaml
    │   │   │   ├── fleurs_cmn_hans_cn.yaml
    │   │   │   └── fleurs_yue_hant_hk.yaml
    │   │   ├── infovqa
    │   │   │   ├── infovqa.yaml
    │   │   │   ├── infovqa_val.yaml
    │   │   │   └── infovqa_test.yaml
    │   │   ├── synthdog
    │   │   │   └── synthdog.yaml
    │   │   ├── covost2
    │   │   │   ├── covost2.yaml
    │   │   │   ├── covost2_en_zh.yaml
    │   │   │   ├── covost2_zh_en.yaml
    │   │   │   ├── covost2_zh_en_dev.yaml
    │   │   │   └── covost2_zh_en_test.yaml
    │   │   ├── textcaps
    │   │   │   ├── textcaps.yaml
    │   │   │   └── _default_template_textcaps_yaml
    │   │   ├── worldqa
    │   │   │   ├── worldqa.yaml
    │   │   │   └── _default_template_yaml
    │   │   ├── vizwiz_vqa
    │   │   │   └── _vizwiz_vqa.yaml
    │   │   ├── pope
    │   │   │   └── pope_full.yaml
    │   │   ├── scienceqa
    │   │   │   └── scienceqa_full.yaml
    │   │   ├── arc
    │   │   │   └── arc_challenge.yaml
    │   │   ├── clotho_aqa
    │   │   │   ├── clotho_aqa.yaml
    │   │   │   └── _default_template_yaml
    │   │   ├── coco_cap
    │   │   │   ├── coco2014_cap.yaml
    │   │   │   ├── coco2017_cap.yaml
    │   │   │   ├── coco_karpathy.yaml
    │   │   │   └── coco_cap.yaml
    │   │   ├── gigaspeech
    │   │   │   └── gigaspeech.yaml
    │   │   ├── multidocvqa
    │   │   │   └── multidocvqa.yaml
    │   │   ├── screenspot
    │   │   │   ├── _screenspot.yaml
    │   │   │   ├── screenspot_rec_test.yaml
    │   │   │   └── screenspot_reg_test.yaml
    │   │   ├── vstar_bench
    │   │   │   ├── __init__.py
    │   │   │   ├── vstar_bench.yaml
    │   │   │   ├── vstar_bench_direct_attributes.yaml
    │   │   │   └── vstar_bench_relative_position.yaml
    │   │   ├── megabench
    │   │   │   ├── megabench_core.yaml
    │   │   │   ├── megabench_open.yaml
    │   │   │   ├── megabench_core_si.yaml
    │   │   │   ├── megabench_open_si.yaml
    │   │   │   ├── megabench.yaml
    │   │   │   ├── metrics
    │   │   │   │   ├── parsing
    │   │   │   │   │   └── dummy_parse.py
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── aggregation
    │   │   │   │   │   └── unsupported_agg.py
    │   │   │   │   └── scoring
    │   │   │   │   │   └── unsupported_scoring.py
    │   │   │   └── requirements.txt
    │   │   ├── nextqa
    │   │   │   ├── nextqa.yaml
    │   │   │   └── _default_template_yaml
    │   │   ├── qbench
    │   │   │   └── qbenchs_dev.yaml
    │   │   ├── wild_vision_bench
    │   │   │   └── wildvision_bench.yaml
    │   │   ├── capability
    │   │   │   ├── capability_OCR.yaml
    │   │   │   ├── capability_action.yaml
    │   │   │   ├── capability_event.yaml
    │   │   │   ├── capability_scene.yaml
    │   │   │   ├── capability_style.yaml
    │   │   │   ├── capability_camera_angle.yaml
    │   │   │   ├── capability_object_color.yaml
    │   │   │   ├── capability_object_number.yaml
    │   │   │   ├── capability_camera_movement.yaml
    │   │   │   ├── capability_object_category.yaml
    │   │   │   ├── capability_spatial_relation.yaml
    │   │   │   ├── capability_dynamic_object_number.yaml
    │   │   │   └── capability_character_identification.yaml
    │   │   ├── internal_eval
    │   │   │   ├── internal_eval.yaml
    │   │   │   └── _default_template_internal_eval_yaml
    │   │   ├── live_bench
    │   │   │   ├── live_bench_2406.yaml
    │   │   │   ├── live_bench_2407.yaml
    │   │   │   ├── live_bench_2409.yaml
    │   │   │   └── live_bench.yaml
    │   │   ├── livexiv_tqa
    │   │   │   ├── livexiv_tqa_v3.yaml
    │   │   │   ├── livexiv_tqa_v4.yaml
    │   │   │   ├── livexiv_tqa_v5.yaml
    │   │   │   ├── livexiv_tqa_v6.yaml
    │   │   │   ├── livexiv_tqa_v1.yaml
    │   │   │   ├── livexiv_tqa_v2.yaml
    │   │   │   └── livexiv_tqa.yaml
    │   │   ├── livexiv_vqa
    │   │   │   ├── livexiv_vqa_v3.yaml
    │   │   │   ├── livexiv_vqa_v4.yaml
    │   │   │   ├── livexiv_vqa_v5.yaml
    │   │   │   ├── livexiv_vqa_v6.yaml
    │   │   │   ├── livexiv_vqa_v1.yaml
    │   │   │   ├── livexiv_vqa_v2.yaml
    │   │   │   └── livexiv_vqa.yaml
    │   │   ├── mix_evals
    │   │   │   ├── audio2text
    │   │   │   │   ├── mix_evals_audio2text.yaml
    │   │   │   │   ├── mix_evals_audio2text_hard.yaml
    │   │   │   │   └── _default_template_yaml
    │   │   │   ├── image2text
    │   │   │   │   ├── mix_evals_image2text.yaml
    │   │   │   │   └── mix_evals_image2text_hard.yaml
    │   │   │   └── video2text
    │   │   │   │   ├── mix_evals_video2text.yaml
    │   │   │   │   └── mix_evals_video2text_hard.yaml
    │   │   ├── open_asr
    │   │   │   ├── openasr_ami.yaml
    │   │   │   ├── openasr_tedlium.yaml
    │   │   │   ├── openasr_voxpopuli.yaml
    │   │   │   ├── openasr_earnings22.yaml
    │   │   │   ├── openasr_gigaspeech.yaml
    │   │   │   ├── openasr_librispeech.yaml
    │   │   │   ├── openasr_spgispeech.yaml
    │   │   │   ├── openasr_common_voice.yaml
    │   │   │   ├── openasr_librispeech_test_clean.yaml
    │   │   │   ├── openasr_librispeech_test_other.yaml
    │   │   │   └── openasr.yaml
    │   │   ├── jmmmu
    │   │   │   ├── jmmmu_math.yaml
    │   │   │   ├── jmmmu_music.yaml
    │   │   │   ├── jmmmu_biology.yaml
    │   │   │   ├── jmmmu_design.yaml
    │   │   │   ├── jmmmu_manage.yaml
    │   │   │   ├── jmmmu_physics.yaml
    │   │   │   ├── jmmmu_chemistry.yaml
    │   │   │   ├── jmmmu_economics.yaml
    │   │   │   ├── jmmmu_finance.yaml
    │   │   │   ├── jmmmu_marketing.yaml
    │   │   │   ├── jmmmu_materials.yaml
    │   │   │   ├── jmmmu_pharmacy.yaml
    │   │   │   ├── jmmmu_accounting.yaml
    │   │   │   ├── jmmmu_psychology.yaml
    │   │   │   ├── jmmmu_agriculture.yaml
    │   │   │   ├── jmmmu_electronics.yaml
    │   │   │   ├── jmmmu_japanese_art.yaml
    │   │   │   ├── jmmmu_public_health.yaml
    │   │   │   ├── jmmmu_world_history.yaml
    │   │   │   ├── jmmmu_clinical_medicine.yaml
    │   │   │   ├── jmmmu_computer_science.yaml
    │   │   │   ├── jmmmu_energy_and_power.yaml
    │   │   │   ├── jmmmu_japanese_heritage.yaml
    │   │   │   ├── jmmmu_japanese_history.yaml
    │   │   │   ├── jmmmu_basic_medical_science.yaml
    │   │   │   ├── jmmmu_mechanical_engineering.yaml
    │   │   │   ├── jmmmu_architecture_and_engineering.yaml
    │   │   │   └── jmmmu_diagnostics_and_laboratory_medicine.yaml
    │   │   ├── detailcaps
    │   │   │   └── _default_template_detailcaps_yaml
    │   │   ├── refcoco
    │   │   │   ├── refcoco_seg_val.yaml
    │   │   │   ├── refcoco_bbox_val.yaml
    │   │   │   ├── refcoco_seg_test.yaml
    │   │   │   ├── refcoco_bbox_test.yaml
    │   │   │   ├── refcoco_bbox_testA.yaml
    │   │   │   ├── refcoco_bbox_testB.yaml
    │   │   │   ├── refcoco_seg_testA.yaml
    │   │   │   ├── refcoco_seg_testB.yaml
    │   │   │   ├── refcoco_bbox_rec_val.yaml
    │   │   │   ├── refcoco_bbox_rec_test.yaml
    │   │   │   ├── refcoco_bbox_rec_testA.yaml
    │   │   │   ├── refcoco_bbox_rec_testB.yaml
    │   │   │   └── _refcoco.yaml
    │   │   ├── refcoco+
    │   │   │   ├── refcoco+_seg_val.yaml
    │   │   │   ├── refcoco+_bbox_val.yaml
    │   │   │   ├── refcoco+_seg_testA.yaml
    │   │   │   ├── refcoco+_seg_testB.yaml
    │   │   │   ├── refcoco+_bbox_testA.yaml
    │   │   │   ├── refcoco+_bbox_testB.yaml
    │   │   │   ├── refcoco+_bbox_rec_val.yaml
    │   │   │   ├── refcoco+_bbox_rec_testA.yaml
    │   │   │   ├── refcoco+_bbox_rec_testB.yaml
    │   │   │   └── _refcoco.yaml
    │   │   ├── refcocog
    │   │   │   ├── refcocog_seg_val.yaml
    │   │   │   ├── refcocog_bbox_val.yaml
    │   │   │   ├── refcocog_seg_test.yaml
    │   │   │   ├── _refcoco.yaml
    │   │   │   ├── refcocog_bbox_test.yaml
    │   │   │   ├── refcocog_bbox_rec_val.yaml
    │   │   │   └── refcocog_bbox_rec_test.yaml
    │   │   ├── common_voice_15
    │   │   │   ├── common_voice_15.yaml
    │   │   │   ├── common_voice_15_en.yaml
    │   │   │   ├── common_voice_15_fr.yaml
    │   │   │   └── common_voice_15_zh-CN.yaml
    │   │   ├── librispeech
    │   │   │   ├── librispeech_long.yaml
    │   │   │   ├── librispeech.yaml
    │   │   │   ├── librispeech_test_clean_long.yaml
    │   │   │   └── librispeech_test_other_long.yaml
    │   │   ├── mmmu_pro
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── mmmu_pro_cot.yaml
    │   │   │   └── mmmu_pro.yaml
    │   │   ├── videommmu
    │   │   │   └── video_mmmu.yaml
    │   │   ├── videochatgpt
    │   │   │   ├── _videochatgpt.yaml
    │   │   │   └── _default_template_yaml
    │   │   ├── vlmsareblind
    │   │   │   └── __init__.py
    │   │   ├── youcook2
    │   │   │   └── _default_template_yaml
    │   │   ├── gpqa
    │   │   │   ├── n_shot
    │   │   │   │   ├── gpqa_main_n_shot.yaml
    │   │   │   │   ├── gpqa_diamond_n_shot.yaml
    │   │   │   │   └── gpqa_extended_n_shot.yaml
    │   │   │   ├── zeroshot
    │   │   │   │   ├── gpqa_main_zeroshot.yaml
    │   │   │   │   ├── gpqa_diamond_zeroshot.yaml
    │   │   │   │   └── gpqa_extended_zeroshot.yaml
    │   │   │   ├── cot_n_shot
    │   │   │   │   ├── gpqa_main_cot_n_shot.yaml
    │   │   │   │   ├── gpqa_diamond_cot_n_shot.yaml
    │   │   │   │   └── gpqa_extended_cot_n_shot.yaml
    │   │   │   ├── cot_zeroshot
    │   │   │   │   ├── gpqa_main_cot_zeroshot.yaml
    │   │   │   │   ├── gpqa_diamond_cot_zeroshot.yaml
    │   │   │   │   └── gpqa_extended_cot_zeroshot.yaml
    │   │   │   └── generative
    │   │   │   │   ├── gpqa_main_generative_n_shot.yaml
    │   │   │   │   ├── gpqa_diamond_generative_n_shot.yaml
    │   │   │   │   └── gpqa_extended_generative_n_shot.yaml
    │   │   ├── illusionvqa
    │   │   │   ├── illusionvqa_comprehension.yaml
    │   │   │   └── illusionvqa_soft_localization.yaml
    │   │   ├── olympiadbench
    │   │   │   └── olympiadbench.yaml
    │   │   ├── temporalbench
    │   │   │   └── temporalbench.yaml
    │   │   ├── hrbench
    │   │   │   └── hrbench.yaml
    │   │   ├── air_bench
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── air_bench_chat.yaml
    │   │   │   └── air_bench_foundation.yaml
    │   │   ├── tempcompass
    │   │   │   └── _tempcompass.yaml
    │   │   ├── egothink
    │   │   │   └── _default_template_yaml
    │   │   ├── llava_interleave_bench
    │   │   │   ├── interleave_bench.yaml
    │   │   │   └── _default_template_interleave_yaml
    │   │   ├── mathvista
    │   │   │   ├── mathvista.yaml
    │   │   │   └── mathvista_testmini.yaml
    │   │   ├── vitatecs
    │   │   │   ├── _vitatecs.yaml
    │   │   │   └── _default_template_yaml
    │   │   ├── mmlu
    │   │   │   ├── default
    │   │   │   │   ├── _mmlu_stem.yaml
    │   │   │   │   ├── _mmlu_other.yaml
    │   │   │   │   ├── _mmlu_humanities.yaml
    │   │   │   │   ├── _mmlu.yaml
    │   │   │   │   ├── _mmlu_social_sciences.yaml
    │   │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   │   ├── mmlu_virology.yaml
    │   │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   │   ├── mmlu_marketing.yaml
    │   │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   │   ├── mmlu_management.yaml
    │   │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   │   ├── mmlu_sociology.yaml
    │   │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   │   ├── mmlu_world_religions.yaml
    │   │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   │   ├── mmlu_international_law.yaml
    │   │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   │   └── mmlu_professional_accounting.yaml
    │   │   │   ├── continuation
    │   │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   │   ├── mmlu_virology.yaml
    │   │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   │   ├── mmlu_marketing.yaml
    │   │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   │   ├── mmlu_management.yaml
    │   │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   │   ├── mmlu_sociology.yaml
    │   │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   │   ├── mmlu_world_religions.yaml
    │   │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   │   ├── mmlu_international_law.yaml
    │   │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   │   └── mmlu_high_school_european_history.yaml
    │   │   │   ├── generative
    │   │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   │   ├── mmlu_virology.yaml
    │   │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   │   ├── mmlu_marketing.yaml
    │   │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   │   ├── mmlu_management.yaml
    │   │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   │   ├── mmlu_sociology.yaml
    │   │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   │   ├── mmlu_world_religions.yaml
    │   │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   │   ├── mmlu_international_law.yaml
    │   │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   │   └── mmlu_us_foreign_policy.yaml
    │   │   │   ├── flan_cot_zeroshot
    │   │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   │   ├── mmlu_marketing.yaml
    │   │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   │   ├── mmlu_virology.yaml
    │   │   │   │   ├── mmlu_management.yaml
    │   │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   │   ├── mmlu_sociology.yaml
    │   │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   │   ├── mmlu_world_religions.yaml
    │   │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   │   ├── mmlu_international_law.yaml
    │   │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   │   └── mmlu_professional_medicine.yaml
    │   │   │   └── flan_n_shot
    │   │   │   │   ├── generative
    │   │   │   │       ├── mmlu_anatomy.yaml
    │   │   │   │       ├── mmlu_virology.yaml
    │   │   │   │       ├── mmlu_astronomy.yaml
    │   │   │   │       ├── mmlu_marketing.yaml
    │   │   │   │       ├── mmlu_nutrition.yaml
    │   │   │   │       ├── mmlu_management.yaml
    │   │   │   │       ├── mmlu_human_aging.yaml
    │   │   │   │       ├── mmlu_philosophy.yaml
    │   │   │   │       ├── mmlu_prehistory.yaml
    │   │   │   │       ├── mmlu_sociology.yaml
    │   │   │   │       ├── mmlu_global_facts.yaml
    │   │   │   │       ├── mmlu_miscellaneous.yaml
    │   │   │   │       ├── mmlu_econometrics.yaml
    │   │   │   │       ├── mmlu_formal_logic.yaml
    │   │   │   │       ├── mmlu_jurisprudence.yaml
    │   │   │   │       ├── mmlu_business_ethics.yaml
    │   │   │   │       ├── mmlu_college_biology.yaml
    │   │   │   │       ├── mmlu_college_physics.yaml
    │   │   │   │       ├── mmlu_moral_disputes.yaml
    │   │   │   │       ├── mmlu_abstract_algebra.yaml
    │   │   │   │       ├── mmlu_college_chemistry.yaml
    │   │   │   │       ├── mmlu_college_medicine.yaml
    │   │   │   │       ├── mmlu_computer_security.yaml
    │   │   │   │       ├── mmlu_machine_learning.yaml
    │   │   │   │       ├── mmlu_medical_genetics.yaml
    │   │   │   │       ├── mmlu_moral_scenarios.yaml
    │   │   │   │       └── mmlu_world_religions.yaml
    │   │   │   │   └── loglikelihood
    │   │   │   │       ├── mmlu_anatomy.yaml
    │   │   │   │       ├── mmlu_astronomy.yaml
    │   │   │   │       ├── mmlu_marketing.yaml
    │   │   │   │       ├── mmlu_nutrition.yaml
    │   │   │   │       ├── mmlu_virology.yaml
    │   │   │   │       ├── mmlu_management.yaml
    │   │   │   │       ├── mmlu_human_aging.yaml
    │   │   │   │       ├── mmlu_philosophy.yaml
    │   │   │   │       ├── mmlu_prehistory.yaml
    │   │   │   │       ├── mmlu_sociology.yaml
    │   │   │   │       ├── mmlu_global_facts.yaml
    │   │   │   │       ├── mmlu_formal_logic.yaml
    │   │   │   │       └── mmlu_miscellaneous.yaml
    │   │   ├── mmupd
    │   │   │   ├── mmupd_option.yaml
    │   │   │   ├── mmupd_base.yaml
    │   │   │   ├── mmupd_instruction.yaml
    │   │   │   ├── mmupd.yaml
    │   │   │   ├── mmaad_base.yaml
    │   │   │   ├── mmiasd_base.yaml
    │   │   │   └── mmivqd_base.yaml
    │   │   ├── mmbench
    │   │   │   ├── mmbench_cn.yaml
    │   │   │   ├── mmbench_en.yaml
    │   │   │   ├── mmbench_cn_test.yaml
    │   │   │   ├── mmbench_en_test.yaml
    │   │   │   └── mmbench.yaml
    │   │   ├── vdc
    │   │   │   └── _default_template_yaml
    │   │   ├── egoschema
    │   │   │   └── _default_template_yaml
    │   │   ├── perceptiontest
    │   │   │   ├── test
    │   │   │   │   └── _default_template_yaml
    │   │   │   └── val
    │   │   │   │   └── _default_template_yaml
    │   │   ├── mvbench
    │   │   │   ├── mvbench_action_count.yaml
    │   │   │   ├── mvbench_moving_count.yaml
    │   │   │   ├── mvbench_state_change.yaml
    │   │   │   ├── mvbench_action_antonym.yaml
    │   │   │   ├── mvbench_object_shuffle.yaml
    │   │   │   ├── mvbench_action_sequence.yaml
    │   │   │   ├── mvbench_character_order.yaml
    │   │   │   ├── mvbench_action_prediction.yaml
    │   │   │   ├── mvbench_fine_grained_pose.yaml
    │   │   │   ├── mvbench_moving_attribute.yaml
    │   │   │   ├── mvbench_moving_direction.yaml
    │   │   │   ├── mvbench_object_existence.yaml
    │   │   │   ├── mvbench_scene_transition.yaml
    │   │   │   ├── mvbench_unexpected_action.yaml
    │   │   │   ├── mvbench_object_interaction.yaml
    │   │   │   ├── mvbench_action_localization.yaml
    │   │   │   ├── mvbench_fine_grained_action.yaml
    │   │   │   ├── mvbench_egocentric_navigation.yaml
    │   │   │   └── mvbench_counterfactual_inference.yaml
    │   │   ├── cvrr
    │   │   │   └── _default_template_yaml
    │   │   ├── mathverse
    │   │   │   ├── mathverse_testmini_text.yaml
    │   │   │   └── mathverse_testmini_vision.yaml
    │   │   ├── mmlu_pro
    │   │   │   ├── mmlu_pro_law.yaml
    │   │   │   ├── mmlu_pro_math.yaml
    │   │   │   └── mmlu_pro_other.yaml
    │   │   └── video_detail_description
    │   │   │   └── _default_template_yaml
    │   └── loggers
    │   │   └── __init__.py
    ├── miscs
    │   ├── llava_result_check.md
    │   ├── llava_sglang_result_check.md
    │   ├── test_scienceqa.py
    │   └── example_eval.yaml
    ├── tools
    │   ├── live_bench
    │   │   ├── live_bench
    │   │   │   ├── data_generator
    │   │   │   │   ├── utils
    │   │   │   │   │   └── __init__.py
    │   │   │   │   ├── example
    │   │   │   │   │   └── example_website.png
    │   │   │   │   └── __init__.py
    │   │   │   ├── driver
    │   │   │   │   ├── .gitignore
    │   │   │   │   └── __init__.py
    │   │   │   ├── __init__.py
    │   │   │   ├── websites
    │   │   │   │   └── __init__.py
    │   │   │   └── screen_shoter
    │   │   │   │   └── __init__.py
    │   │   └── setup.py
    │   └── lite
    │   │   ├── embedder
    │   │       └── __init__.py
    │   │   └── shrinker
    │   │       └── __init__.py
    ├── setup.py
    ├── .github
    │   └── issue_template.md
    └── examples
    │   └── models
    │       ├── aria.sh
    │       └── xai_grok.sh
├── qwen-vl-utils
    ├── .python-version
    └── src
    │   └── qwen_vl_utils
    │       └── __init__.py
└── docs
    ├── logo.png
    └── framework.png


/llava-ov-15/src/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/llava-ov-15/src/serve/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/llava-ov-15/src/train/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/api/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms-eval/miscs/llava_result_check.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/caching/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/qwen-vl-utils/.python-version:
--------------------------------------------------------------------------------
1 | 3.8.19
2 | 


--------------------------------------------------------------------------------
/lmms-eval/miscs/llava_sglang_result_check.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/model_utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/ocrbench_v2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/video_chatgpt/eval/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/_task_utils/gpt_eval_utils.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/ocrbench_v2/spotting_eval/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms-eval/tools/live_bench/live_bench/data_generator/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lmms-eval/tools/live_bench/live_bench/driver/.gitignore:
--------------------------------------------------------------------------------
1 | extensions/
2 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cuva/cuva.yaml:
--------------------------------------------------------------------------------
1 | group : cuva
2 | task:
3 | - cuva_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/funqa/funqa.yaml:
--------------------------------------------------------------------------------
1 | group : funqa
2 | task:
3 | - funqa_test


--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JulietChoo/VisionSelector/HEAD/docs/logo.png


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmt/mmt.yaml:
--------------------------------------------------------------------------------
1 | group: mmt
2 | task:
3 | - mmt_val
4 | - mmt_test


--------------------------------------------------------------------------------
/lmms-eval/tools/live_bench/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 | 
3 | setuptools.setup()
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/ok_vqa/_ok_vqa.yaml:
--------------------------------------------------------------------------------
1 | group: ok_vqa
2 | task:
3 | - ok_vqa_val2014


--------------------------------------------------------------------------------
/docs/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JulietChoo/VisionSelector/HEAD/docs/framework.png


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/flickr30k/flickr30k.yaml:
--------------------------------------------------------------------------------
1 | group: flickr30k
2 | task:
3 | - flickr30k_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmmu/mmmu.yaml:
--------------------------------------------------------------------------------
1 | group: mmmu
2 | task:
3 | - mmmu_val
4 | - mmmu_test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vqav2/_vqav2.yaml:
--------------------------------------------------------------------------------
1 | group: vqav2
2 | task:
3 | - vqav2_val
4 | - vqav2_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cmmmu/_cmmmu.yaml:
--------------------------------------------------------------------------------
1 | group: cmmmu
2 | task:
3 | - cmmmu_val
4 | - cmmmu_test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/docvqa/docvqa.yaml:
--------------------------------------------------------------------------------
1 | group: docvqa
2 | task:
3 | - docvqa_val
4 | - docvqa_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmau/mmau.yaml:
--------------------------------------------------------------------------------
1 | group: mmau
2 | task:
3 |   - mmau_test_mini
4 |   - mmau_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmt/mmt_mi.yaml:
--------------------------------------------------------------------------------
1 | group: mmt_mi
2 | task:
3 | - mmt_mi_val
4 | - mmt_mi_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/video_chatgpt/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import VideoChatGPTLlamaForCausalLM
2 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/iconqa/iconqa.yaml:
--------------------------------------------------------------------------------
1 | group: iconqa
2 | task:
3 | - iconqa_val
4 | - iconqa_test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmsearch/retrieve_content/tokenization/__init__.py:
--------------------------------------------------------------------------------
1 | # Implement your code here.
2 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/nocaps/nocaps.yaml:
--------------------------------------------------------------------------------
1 | group : nocaps
2 | task:
3 |   - nocaps_test
4 |   - nocaps_val


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/textvqa/_textvqa.yaml:
--------------------------------------------------------------------------------
1 | group: textvqa
2 | task:
3 | - textvqa_val
4 | - textvqa_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/websrc/websrc.yaml:
--------------------------------------------------------------------------------
1 | group: websrc
2 | task:
3 | - websrc_val
4 | - websrc_test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/fleurs/fleurs.yaml:
--------------------------------------------------------------------------------
1 | group: fleurs 
2 | task:
3 | - fleurs_en
4 | - fleurs_cmn_hans_cn


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/infovqa/infovqa.yaml:
--------------------------------------------------------------------------------
1 | group: infovqa
2 | task:
3 | - infovqa_val
4 | - infovqa_test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/synthdog/synthdog.yaml:
--------------------------------------------------------------------------------
1 | group: synthdog
2 | task:
3 | - synthdog_en
4 | - synthdog_zh


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/covost2/covost2.yaml:
--------------------------------------------------------------------------------
1 | group: covost2
2 | task:
3 |   - covost2_en_zh
4 |   - covost2_zh_en


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/textcaps/textcaps.yaml:
--------------------------------------------------------------------------------
1 | group : textcaps
2 | task:
3 |   - textcaps_val
4 |   - textcaps_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/worldqa/worldqa.yaml:
--------------------------------------------------------------------------------
1 | group: worldqa
2 | task:
3 | - worldqa_gen
4 | - worldqa_mc
5 | 
6 | 


--------------------------------------------------------------------------------
/lmms-eval/tools/live_bench/live_bench/driver/__init__.py:
--------------------------------------------------------------------------------
1 | from live_bench.driver.load_driver import load_driver
2 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/fleurs/fleurs_en.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: en_us
2 | include: _default_template_yaml
3 | task: fleurs_en


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vizwiz_vqa/_vizwiz_vqa.yaml:
--------------------------------------------------------------------------------
1 | group: vizwiz_vqa
2 | task:
3 | - vizwiz_vqa_val
4 | - vizwiz_vqa_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/pope/pope_full.yaml:
--------------------------------------------------------------------------------
1 | group : pope_full
2 | task:
3 |   - pope_adv
4 |   - pope_pop
5 |   - pope_random


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/scienceqa/scienceqa_full.yaml:
--------------------------------------------------------------------------------
1 | group: scienceqa_full
2 | task:
3 |   - scienceqa
4 |   - scienceqa_img


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/arc/arc_challenge.yaml:
--------------------------------------------------------------------------------
1 | include: arc_easy.yaml
2 | task: arc_challenge
3 | dataset_name: ARC-Challenge
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/clotho_aqa/clotho_aqa.yaml:
--------------------------------------------------------------------------------
1 | group: clotho_aqa
2 | task:
3 |   - clotho_aqa_val
4 |   - clotho_aqa_test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/coco_cap/coco2014_cap.yaml:
--------------------------------------------------------------------------------
1 | group : coco2014_cap
2 | task:
3 |   - coco2014_cap_val
4 |   - coco2014_cap_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/coco_cap/coco2017_cap.yaml:
--------------------------------------------------------------------------------
1 | group : coco2017_cap
2 | task:
3 |   - coco2017_cap_val
4 |   - coco2017_cap_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gigaspeech/gigaspeech.yaml:
--------------------------------------------------------------------------------
1 | group: gigaspeech
2 | task:
3 |   - gigaspeech_dev
4 |   - gigaspeech_test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/iconqa/iconqa_test.yaml:
--------------------------------------------------------------------------------
1 | task: "iconqa_test"
2 | test_split: test
3 | include: _default_template_docvqa_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/iconqa/iconqa_val.yaml:
--------------------------------------------------------------------------------
1 | task: "iconqa_val"
2 | test_split: val
3 | include: _default_template_docvqa_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multidocvqa/multidocvqa.yaml:
--------------------------------------------------------------------------------
1 | group: multidocvqa
2 | task:
3 | - multidocvqa_val
4 | - multidocvqa_test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/screenspot/_screenspot.yaml:
--------------------------------------------------------------------------------
1 | group: screenspot
2 | task:
3 | - screenspot_reg_test
4 | - screenspot_rec_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vstar_bench/__init__.py:
--------------------------------------------------------------------------------
1 | # V* Benchmark: Guided Visual Search as a Core Mechanism in Multimodal LLMs
2 | 


--------------------------------------------------------------------------------
/lmms-eval/tools/lite/embedder/__init__.py:
--------------------------------------------------------------------------------
1 | from .BaseEmbedder import BaseEmbedder
2 | from .ClipBgeEmbedder import ClipBgeEmbedder
3 | 


--------------------------------------------------------------------------------
/lmms-eval/tools/lite/shrinker/__init__.py:
--------------------------------------------------------------------------------
1 | from .BaseShrinker import BaseShrinker
2 | from .EmbedShrinker import Embed_Shrinker
3 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/loggers/__init__.py:
--------------------------------------------------------------------------------
1 | from .evaluation_tracker import EvaluationTracker
2 | from .wandb_logger import WandbLogger
3 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/coco_cap/coco_karpathy.yaml:
--------------------------------------------------------------------------------
1 | group : coco_karpathy
2 | task:
3 |   - coco_karpathy_val
4 |   - coco_karpathy_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/covost2/covost2_en_zh.yaml:
--------------------------------------------------------------------------------
1 | group: covost2_en_zh
2 | task:
3 |   - covost2_en_zh_test
4 |   - covost2_en_zh_dev


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/covost2/covost2_zh_en.yaml:
--------------------------------------------------------------------------------
1 | group: covost2_zh_en
2 | task:
3 |   - covost2_zh_en_test
4 |   - covost2_zh_en_dev


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/megabench/megabench_core.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: core
2 | task: "megabench_core"
3 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/megabench/megabench_open.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: open
2 | task: "megabench_open"
3 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmmu/mmmu_group_img.yaml:
--------------------------------------------------------------------------------
1 | group: mmmu_group_img
2 | task:
3 | - mmmu_val_group_img
4 | - mmmu_test_group_img
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/nextqa/nextqa.yaml:
--------------------------------------------------------------------------------
1 | group: nextqa
2 | task:
3 | - nextqa_oe_test
4 | - nextqa_oe_val
5 | - nextqa_mc_test
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/ok_vqa/ok_vqa_val2014.yaml:
--------------------------------------------------------------------------------
1 | task: ok_vqa_val2014
2 | test_split: val2014
3 | include: _default_template_vqa_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/qbench/qbenchs_dev.yaml:
--------------------------------------------------------------------------------
1 | group: qbenchs_dev
2 | task:
3 | - qbench_dev
4 | - qbench2_dev
5 | - abench_dev
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmmu/arial.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JulietChoo/VisionSelector/HEAD/lmms-eval/lmms_eval/tasks/mmmu/arial.ttf


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/wild_vision_bench/wildvision_bench.yaml:
--------------------------------------------------------------------------------
1 | group: wildvision
2 | task: 
3 |   - wildvision_0617
4 |   - wildvision_0630


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/capability/capability_OCR.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_OCR
4 | dataset_name: OCR
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/covost2/covost2_zh_en_dev.yaml:
--------------------------------------------------------------------------------
1 | task: "covost2_zh_en_dev"
2 | include: _default_template_zh_en_yaml
3 | test_split: dev
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/fleurs/fleurs_cmn_hans_cn.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: cmn_hans_cn
2 | task: fleurs_cmn_hans_cn
3 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/fleurs/fleurs_yue_hant_hk.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: yue_hant_hk
2 | include: _default_template_yaml
3 | task: fleurs_yue_hant_hk


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/capability/capability_action.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_action
4 | dataset_name: action
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/capability/capability_event.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_event
4 | dataset_name: event
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/capability/capability_scene.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_scene
4 | dataset_name: scene
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/capability/capability_style.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_style
4 | dataset_name: style
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/covost2/covost2_zh_en_test.yaml:
--------------------------------------------------------------------------------
1 | task: "covost2_zh_en_test"
2 | include: _default_template_zh_en_yaml
3 | test_split: test
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/internal_eval/internal_eval.yaml:
--------------------------------------------------------------------------------
1 | group: internal_eval
2 | task:
3 | - d170_cn
4 | - d170_en
5 | - dc100_en
6 | - dc200_cn
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/live_bench/live_bench_2406.yaml:
--------------------------------------------------------------------------------
1 | task: "live_bench_2406"
2 | dataset_name: 2024-06
3 | include: live_bench_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/live_bench/live_bench_2407.yaml:
--------------------------------------------------------------------------------
1 | task: "live_bench_2407"
2 | dataset_name: 2024-07
3 | include: live_bench_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/live_bench/live_bench_2409.yaml:
--------------------------------------------------------------------------------
1 | task: "live_bench_2409"
2 | dataset_name: 2024-09
3 | include: live_bench_template_yaml_v2
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v3.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_tqa_v3"
2 | dataset_name: "v3-TQA"
3 | include: livexiv_tqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v4.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_tqa_v4"
2 | dataset_name: "v4-TQA"
3 | include: livexiv_tqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v5.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_tqa_v5"
2 | dataset_name: "v5-TQA"
3 | include: livexiv_tqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v6.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_tqa_v6"
2 | dataset_name: "v6-TQA"
3 | include: livexiv_tqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v3.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_vqa_v3"
2 | dataset_name: "v3-VQA"
3 | include: livexiv_vqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v4.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_vqa_v4"
2 | dataset_name: "v4-VQA"
3 | include: livexiv_vqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v5.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_vqa_v5"
2 | dataset_name: "v5-VQA"
3 | include: livexiv_vqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v6.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_vqa_v6"
2 | dataset_name: "v6-VQA"
3 | include: livexiv_vqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mix_evals/audio2text/mix_evals_audio2text.yaml:
--------------------------------------------------------------------------------
1 | group: mix_evals_audio2text
2 | task:
3 | - mix_evals_audio2_text_freeform
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/open_asr/openasr_ami.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: ami 
2 | test_split: test
3 | include: _default_template_yaml
4 | task: open_asr_ami


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_math.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Math
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_math"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v1.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_tqa_v1"
2 | dataset_name: "TQA-2024-09-21"
3 | include: livexiv_tqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v2.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_tqa_v2"
2 | dataset_name: "TQA-2024-10-26"
3 | include: livexiv_tqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v1.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_vqa_v1"
2 | dataset_name: "VQA-2024-09-21"
3 | include: livexiv_vqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v2.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_vqa_v2"
2 | dataset_name: "VQA-2024-10-26"
3 | include: livexiv_vqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/megabench/megabench_core_si.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: core_single_image
2 | task: "megabench_core_si"
3 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/megabench/megabench_open_si.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: open_single_image
2 | task: "megabench_open_si"
3 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/detailcaps/_default_template_detailcaps_yaml:
--------------------------------------------------------------------------------
1 | lmms_eval_specific_kwargs:
2 |   default:
3 |     prompt: "Describe this image in detail."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_music.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Music
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_music"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/open_asr/openasr_tedlium.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: tedlium
2 | test_split: test
3 | include: _default_template_yaml
4 | task: open_asr_tedlium


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_seg_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_val
3 | test_split: val
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_biology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Biology
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_biology"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_design.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Design
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_design"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_manage.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Manage
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_manage"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_physics.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Physics
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_physics"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/megabench/megabench.yaml:
--------------------------------------------------------------------------------
1 | group: megabench
2 | task:
3 | - megabench_core
4 | - megabench_open
5 | - megabench_core_si
6 | - megabench_open_si


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mix_evals/audio2text/mix_evals_audio2text_hard.yaml:
--------------------------------------------------------------------------------
1 | group: mix_evals_audio2text_hard
2 | task:
3 | - mix_evals_audio2_text_freeform_hard
4 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmmu/_default_template_yaml:
--------------------------------------------------------------------------------
1 | generation_kwargs:
2 |   max_new_tokens: 16
3 | 
4 | metadata:
5 |   version: 0.0
6 |   interleaved_format: false


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/open_asr/openasr_voxpopuli.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: voxpopuli 
2 | test_split: test
3 | include: _default_template_yaml
4 | task: open_asr_voxpopuli


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_seg_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_seg
2 | task: refcoco+_seg_val
3 | include: _default_template_seg_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_val
3 | test_split: val
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_seg_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_test
3 | test_split: test
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcocog/refcocog_seg_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_seg
2 | task: refcocog_seg_val
3 | include: _default_template_seg_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/capability/capability_camera_angle.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_camera_angle
4 | dataset_name: camera_angle
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/capability/capability_object_color.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_object_color
4 | dataset_name: object_color
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/common_voice_15/common_voice_15.yaml:
--------------------------------------------------------------------------------
1 | group: common_voice_15
2 | task:
3 | - common_voice_15_zh-CN
4 | - common_voice_15_en
5 | - common_voice_15_fr


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_chemistry.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Chemistry
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_chemistry"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_economics.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Economics
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_economics"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_finance.yaml:
--------------------------------------------------------------------------------
1 | 
2 | dataset_name: Finance
3 | tag: "jmmmu_culture_agnostic"
4 | task: "jmmmu_finance"
5 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_marketing.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Marketing
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_marketing"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_materials.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Materials
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_materials"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_pharmacy.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Pharmacy
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_pharmacy"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/librispeech/librispeech_long.yaml:
--------------------------------------------------------------------------------
1 | group: librispeech_long
2 | task:
3 |   - librispeech_test_clean_long
4 |   - librispeech_test_other_long
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmmu_pro/_default_template_yaml:
--------------------------------------------------------------------------------
1 | generation_kwargs:
2 |   max_new_tokens: 256
3 | 
4 | metadata:
5 |   version: 0.0
6 |   interleaved_format: false


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmmu_pro/mmmu_pro_cot.yaml:
--------------------------------------------------------------------------------
1 | group: mmmu_pro_cot
2 | task:
3 | - mmmu_pro_vision_cot
4 | - mmmu_pro_composite_cot
5 | - mmmu_pro_original_cot
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/open_asr/openasr_earnings22.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: earnings22 
2 | test_split: test
3 | include: _default_template_yaml
4 | task: open_asr_earnings22


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/open_asr/openasr_gigaspeech.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: gigaspeech 
2 | test_split: test
3 | include: _default_template_yaml
4 | task: open_asr_gigaspeech


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/open_asr/openasr_librispeech.yaml:
--------------------------------------------------------------------------------
1 | group: openasr_librispeech 
2 | task:
3 | - open_asr_librispeech_test_other
4 | - open_asr_librispeech_test_clean


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/open_asr/openasr_spgispeech.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: spgispeech 
2 | test_split: test
3 | include: _default_template_yaml
4 | task: open_asr_spgispeech


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_bbox_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox
2 | task: refcoco+_bbox_val
3 | include: _default_template_bbox_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_seg_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_seg
2 | task: refcoco+_seg_testA
3 | include: _default_template_seg_yaml
4 | test_split: testA
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_seg_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_seg
2 | task: refcoco+_seg_testB
3 | include: _default_template_seg_yaml
4 | test_split: testB
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_test
3 | test_split: test
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_testA
3 | test_split: testA
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_testB
3 | test_split: testB
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_seg_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_testA
3 | test_split: testA
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_seg_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_testB
3 | test_split: testB
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcocog/refcocog_bbox_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_bbox
2 | task: refcocog_bbox_val
3 | include: _default_template_bbox_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcocog/refcocog_seg_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_seg
2 | task: refcocog_seg_test
3 | include: _default_template_seg_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/videommmu/video_mmmu.yaml:
--------------------------------------------------------------------------------
1 | group: video_mmmu
2 | task:
3 | - video_mmmu_adaptation
4 | - video_mmmu_comprehension
5 | - video_mmmu_perception
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/capability/capability_object_number.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_object_number
4 | dataset_name: object_number
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_accounting.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Accounting
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_accounting"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_psychology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Psychology
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_psychology"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/open_asr/openasr_common_voice.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: common_voice 
2 | test_split: test
3 | include: _default_template_yaml
4 | task: open_asr_common_voice


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_bbox_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox
2 | task: refcoco+_bbox_testA
3 | include: _default_template_bbox_yaml
4 | test_split: testA
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_bbox_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox
2 | task: refcoco+_bbox_testB
3 | include: _default_template_bbox_yaml
4 | test_split: testB
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcocog/_refcoco.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog
2 | task:
3 | - refcocog_seg_test
4 | - refcocog_seg_val
5 | - refcocog_bbox_test
6 | - refcocog_bbox_val
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcocog/refcocog_bbox_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_bbox
2 | task: refcocog_bbox_test
3 | include: _default_template_bbox_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/screenspot/screenspot_rec_test.yaml:
--------------------------------------------------------------------------------
1 | group: screenspot_rec
2 | task: screenspot_rec_test
3 | include: _default_template_rec_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/screenspot/screenspot_reg_test.yaml:
--------------------------------------------------------------------------------
1 | group: screenspot_reg
2 | task: screenspot_reg_test
3 | include: _default_template_reg_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/textcaps/_default_template_textcaps_yaml:
--------------------------------------------------------------------------------
1 | lmms_eval_specific_kwargs:
2 |   default:
3 |     prompt: Provide a one-sentence caption for the provided image.


--------------------------------------------------------------------------------
/lmms-eval/tools/live_bench/live_bench/__init__.py:
--------------------------------------------------------------------------------
1 | from .api.live_bench import generate_live_bench, generate_live_bench_from_path
2 | from .data_generator import LiveBench
3 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/capability/capability_camera_movement.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_camera_movement
4 | dataset_name: camera_movement
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/capability/capability_object_category.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_object_category
4 | dataset_name: object_category
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/capability/capability_spatial_relation.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_spatial_relation
4 | dataset_name: spatial_relation
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/coco_cap/coco_cap.yaml:
--------------------------------------------------------------------------------
1 | group : coco_cap
2 | task:
3 |   - coco2014_cap_val
4 |   - coco2014_cap_test
5 |   - coco2017_cap_val
6 |   - coco2017_cap_test
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_agriculture.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Agriculture
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_agriculture"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_electronics.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Electronics
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_electronics"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_japanese_art.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Japanese_Art
2 | tag: "jmmmu_culture_specific"
3 | task: "jmmmu_japanese_art"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text.yaml:
--------------------------------------------------------------------------------
1 | group: mix_evals_image2text
2 | task:
3 | - mix_evals_image2text_mc
4 | - mix_evals_image2text_freeform
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_rec_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox_rec
2 | task: refcoco_bbox_rec_val
3 | test_split: val
4 | include: _default_template_bbox_rec_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/videochatgpt/_videochatgpt.yaml:
--------------------------------------------------------------------------------
1 | group: videochatgpt
2 | task:
3 |   - videochatgpt_gen
4 |   - videochatgpt_temporal
5 |   - videochatgpt_consistency
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vlmsareblind/__init__.py:
--------------------------------------------------------------------------------
1 | # VLMs Are Blind benchmark task
2 | # Tests visual reasoning capabilities through path-counting in subway connection diagrams
3 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/youcook2/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/YouCook2
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: YouCookIIVideos
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gpqa/n_shot/gpqa_main_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_n_shot_yaml
4 | task: gpqa_main_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/illusionvqa/illusionvqa_comprehension.yaml:
--------------------------------------------------------------------------------
1 | include: illusionvqa.yaml
2 | task: illusionvqa_comprehension
3 | dataset_path: csebuetnlp/illusionVQA-Comprehension


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_public_health.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Public_Health
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_public_health"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_world_history.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: World_History
2 | tag: "jmmmu_culture_specific"
3 | task: "jmmmu_world_history"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/olympiadbench/olympiadbench.yaml:
--------------------------------------------------------------------------------
1 | group: olympiadbench
2 | task:
3 | - olympiadbench_test_en
4 | - olympiadbench_test_cn
5 | metadata:
6 |   - version: 0.0
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_bbox_rec_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox_rec
2 | task: refcoco+_bbox_rec_val
3 | include: _default_template_bbox_rec_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_rec_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox_rec
2 | task: refcoco_bbox_rec_test
3 | test_split: test
4 | include: _default_template_bbox_rec_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcocog/refcocog_bbox_rec_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_bbox_rec
2 | task: refcocog_bbox_rec_val
3 | include: _default_template_bbox_rec_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/temporalbench/temporalbench.yaml:
--------------------------------------------------------------------------------
1 | group: temporalbench
2 | task:
3 | - temporalbench_short_qa
4 | - temporalbench_long_qa
5 | - temporalbench_short_caption
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gpqa/zeroshot/gpqa_main_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_zeroshot_yaml
4 | task: gpqa_main_zeroshot
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_bbox_rec_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox_rec
2 | task: refcoco+_bbox_rec_testA
3 | include: _default_template_bbox_rec_yaml
4 | test_split: testA
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_bbox_rec_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox_rec
2 | task: refcoco+_bbox_rec_testB
3 | include: _default_template_bbox_rec_yaml
4 | test_split: testB
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_rec_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox_rec
2 | task: refcoco_bbox_rec_testA
3 | test_split: testA
4 | include: _default_template_bbox_rec_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_rec_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox_rec
2 | task: refcoco_bbox_rec_testB
3 | test_split: testB
4 | include: _default_template_bbox_rec_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcocog/refcocog_bbox_rec_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_bbox_rec
2 | task: refcocog_bbox_rec_test
3 | include: _default_template_bbox_rec_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/capability/capability_dynamic_object_number.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_dynamic_object_number
4 | dataset_name: dynamic_object_number
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gpqa/n_shot/gpqa_diamond_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_n_shot_yaml
4 | task: gpqa_diamond_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gpqa/n_shot/gpqa_extended_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_n_shot_yaml
4 | task: gpqa_extended_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/illusionvqa/illusionvqa_soft_localization.yaml:
--------------------------------------------------------------------------------
1 | include: illusionvqa.yaml
2 | task: illusionvqa_soft_localization
3 | dataset_path: csebuetnlp/illusionVQA-Soft-Localization


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_clinical_medicine.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Clinical_Medicine
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_clinical_medicine"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_computer_science.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Computer_Science
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_computer_science"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_energy_and_power.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Energy_and_Power
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_energy_and_power"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_japanese_heritage.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Japanese_Heritage
2 | tag: "jmmmu_culture_specific"
3 | task: "jmmmu_japanese_heritage"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_japanese_history.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Japanese_History
2 | tag: "jmmmu_culture_specific"
3 | task: "jmmmu_japanese_history"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/tools/live_bench/live_bench/websites/__init__.py:
--------------------------------------------------------------------------------
1 | from live_bench.websites.load_website import load_websites, load_websites_from_file
2 | from live_bench.websites.website import Website
3 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/video_chatgpt/model/__init__.py:
--------------------------------------------------------------------------------
1 | from lmms_eval.models.video_chatgpt.model.video_chatgpt import (
2 |     VideoChatGPTConfig,
3 |     VideoChatGPTLlamaForCausalLM,
4 | )
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gpqa/cot_n_shot/gpqa_main_cot_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_cot_n_shot_yaml
4 | task: gpqa_main_cot_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gpqa/zeroshot/gpqa_diamond_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_zeroshot_yaml
4 | task: gpqa_diamond_zeroshot
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/hrbench/hrbench.yaml:
--------------------------------------------------------------------------------
1 | group: hrbench
2 | task:
3 |   - hrbench4k
4 |   - hrbench8k
5 | metadata:
6 |   version: 0.0
7 |   gpt_eval_model_name: "gpt-3.5-turbo"
8 |   max_workers: 1


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/open_asr/openasr_librispeech_test_clean.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: librispeech
2 | test_split: test.clean
3 | include: _default_template_yaml
4 | task: open_asr_librispeech_test_clean


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/open_asr/openasr_librispeech_test_other.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: librispeech
2 | test_split: test.other
3 | include: _default_template_yaml
4 | task: open_asr_librispeech_test_other


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/air_bench/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/AIR_Bench
2 | dataset_kwargs:
3 |   token: True
4 | 
5 | metadata:
6 |   gpt_eval_model_name: gpt-4o
7 |   version: 0.0
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/air_bench/air_bench_chat.yaml:
--------------------------------------------------------------------------------
1 | group: air_bench_chat
2 | task:
3 |   - air_bench_chat_sound
4 |   - air_bench_chat_music
5 |   - air_bench_chat_speech
6 |   - air_bench_chat_mixed
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/capability/capability_character_identification.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_character_identification
4 | dataset_name: character_identification
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gpqa/zeroshot/gpqa_extended_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_zeroshot_yaml
4 | task: gpqa_extended_zeroshot
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/internal_eval/_default_template_internal_eval_yaml:
--------------------------------------------------------------------------------
1 | lmms_eval_specific_kwargs:
2 |   default:
3 |     pre_prompt: ""
4 |     post_prompt: ""
5 | process_results_use_image: true
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmmu_pro/mmmu_pro.yaml:
--------------------------------------------------------------------------------
1 | group: mmmu_pro
2 | task:
3 | - mmmu_pro_vision
4 | # - mmmu_pro_composite # removing composite task in formal MMMU-Pro evaluation
5 | - mmmu_pro_standard
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/tempcompass/_tempcompass.yaml:
--------------------------------------------------------------------------------
1 | group: tempcompass
2 | task:
3 | - tempcompass_multi_choice
4 | - tempcompass_yes_no
5 | - tempcompass_caption_matching
6 | - tempcompass_captioning
7 | 


--------------------------------------------------------------------------------
/lmms-eval/miscs/test_scienceqa.py:
--------------------------------------------------------------------------------
1 | from datasets import load_dataset
2 | 
3 | dataset = load_dataset("Otter-AI/ScienceQA", trust_remote_code=True)["test"]
4 | for doc in dataset:
5 |     print(doc["id"])
6 | 


--------------------------------------------------------------------------------
/qwen-vl-utils/src/qwen_vl_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .vision_process import (
2 |     extract_vision_info,
3 |     fetch_image,
4 |     fetch_video,
5 |     process_vision_info,
6 |     smart_resize,
7 | )
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/air_bench/air_bench_foundation.yaml:
--------------------------------------------------------------------------------
1 | group: air_bench_foundation
2 | task:
3 |   - air_bench_foundation_sound
4 |   - air_bench_foundation_music
5 |   - air_bench_foundation_speech
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gpqa/cot_n_shot/gpqa_diamond_cot_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_cot_n_shot_yaml
4 | task: gpqa_diamond_cot_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gpqa/cot_n_shot/gpqa_extended_cot_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_cot_n_shot_yaml
4 | task: gpqa_extended_cot_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gpqa/cot_zeroshot/gpqa_main_cot_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_cot_zeroshot_yaml
4 | task: gpqa_main_cot_zeroshot
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_basic_medical_science.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Basic_Medical_Science
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_basic_medical_science"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_mechanical_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Mechanical_Engineering
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_mechanical_engineering"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/librispeech/librispeech.yaml:
--------------------------------------------------------------------------------
1 | group: librispeech
2 | task:
3 |   - librispeech_dev_clean
4 |   - librispeech_dev_other
5 |   - librispeech_test_clean
6 |   - librispeech_test_other
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/egothink/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: EgoLife-v1/Egothink
2 | dataset_kwargs:
3 |   token: True
4 | test_split: test
5 | metadata:
6 |   version: 0.0
7 |   gpt_eval_model_name: "gpt-4"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gpqa/cot_zeroshot/gpqa_diamond_cot_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_cot_zeroshot_yaml
4 | task: gpqa_diamond_cot_zeroshot
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text.yaml:
--------------------------------------------------------------------------------
1 | group: mix_evals_video2text
2 | task:
3 | - mix_evals_video2text_mc
4 | - mix_evals_video2text_freeform
5 | # - mix_evals_video2text_openended


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gpqa/cot_zeroshot/gpqa_extended_cot_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_cot_zeroshot_yaml
4 | task: gpqa_extended_cot_zeroshot
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gpqa/generative/gpqa_main_generative_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_generative_n_shot_yaml
4 | task: gpqa_main_generative_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms-eval/tools/live_bench/live_bench/data_generator/example/example_website.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JulietChoo/VisionSelector/HEAD/lmms-eval/tools/live_bench/live_bench/data_generator/example/example_website.png


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/livexiv_tqa/livexiv_tqa.yaml:
--------------------------------------------------------------------------------
 1 | group: livexiv_tqa
 2 | task:
 3 | - livexiv_tqa_v1
 4 | - livexiv_tqa_v2
 5 | - livexiv_tqa_v3
 6 | - livexiv_tqa_v4
 7 | - livexiv_tqa_v5
 8 | - livexiv_tqa_v6
 9 | 
10 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/livexiv_vqa/livexiv_vqa.yaml:
--------------------------------------------------------------------------------
 1 | group: livexiv_vqa
 2 | task:
 3 | - livexiv_vqa_v1
 4 | - livexiv_vqa_v2
 5 | - livexiv_vqa_v3
 6 | - livexiv_vqa_v4
 7 | - livexiv_vqa_v5
 8 | - livexiv_vqa_v6
 9 | 
10 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/llava_interleave_bench/interleave_bench.yaml:
--------------------------------------------------------------------------------
1 | group: llava_interleave_bench
2 | task:
3 | - llava_interleave_bench_in_domain
4 | - llava_interleave_bench_out_domain
5 | - llava_interleave_bench_multi_view


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/_refcoco.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+
2 | task:
3 | - refcoco+_seg_val
4 | - refcoco+_seg_testA
5 | - refcoco+_seg_testB
6 | - refcoco+_bbox_val
7 | - refcoco+_bbox_testA
8 | - refcoco+_bbox_testB
9 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vstar_bench/vstar_bench.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | metric_list:
3 |   - metric: vstar_overall_acc
4 |     aggregation: !function utils.vstar_aggregate_results
5 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gpqa/generative/gpqa_diamond_generative_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_generative_n_shot_yaml
4 | task: gpqa_diamond_generative_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_architecture_and_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Architecture_and_Engineering
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_architecture_and_engineering"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/live_bench/live_bench.yaml:
--------------------------------------------------------------------------------
 1 | group: live_bench
 2 | task:
 3 | - live_bench_2406
 4 | - live_bench_2407
 5 | - live_bench_2409
 6 | 
 7 | metadata:
 8 |   api_type: azure
 9 |   eval_with_mini: false
10 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathvista/mathvista.yaml:
--------------------------------------------------------------------------------
1 | group: mathvista
2 | task:
3 |   - mathvista_testmini
4 |   - mathvista_test
5 | metadata:
6 |   version: 0.0
7 |   gpt_eval_model_name: "gpt-3.5-turbo"
8 |   quick_extract: false


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/nextqa/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/NExTQA
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: nextqa
6 | metadata:
7 |   version: 0.0.1
8 |   load_package: True
9 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/nocaps/_default_template_nocaps_yaml:
--------------------------------------------------------------------------------
1 | lmms_eval_specific_kwargs:
2 |   default:
3 |     prompt: "Provide a one-sentence caption for the provided image."
4 |   plm:
5 |     prompt: "Describe the image briefly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vitatecs/_vitatecs.yaml:
--------------------------------------------------------------------------------
1 | group: vitatecs
2 | task:
3 | - vitatecs_direction
4 | - vitatecs_intensity
5 | - vitatecs_sequence
6 | - vitatecs_compositionality
7 | - vitatecs_localization
8 | - vitatecs_type
9 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cuva/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: fesvhtr/CUVA_LMMs
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: cuva
6 | metadata:
7 |   version: 0.0
8 |   gpt_eval_model_name: "gpt-4-0613"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/docvqa/docvqa_val.yaml:
--------------------------------------------------------------------------------
1 | task: "docvqa_val"
2 | test_split: validation
3 | metric_list:
4 |   - metric: anls
5 |     aggregation: mean
6 |     higher_is_better: true
7 | include: _default_template_docvqa_yaml
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/funqa/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: fesvhtr/FunQA_LMMs
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: funqa
6 | metadata:
7 |   version: 0.0
8 |   gpt_eval_model_name: "gpt-4-0613"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gpqa/generative/gpqa_extended_generative_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_generative_n_shot_yaml
4 | task: gpqa_extended_generative_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_hard.yaml:
--------------------------------------------------------------------------------
1 | group: mix_evals_image2text_hard
2 | task:
3 | - mix_evals_image2text_mc_hard
4 | - mix_evals_image2text_freeform_hard
5 | # - mix_evals_image2text_openended


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_hard.yaml:
--------------------------------------------------------------------------------
1 | group: mix_evals_video2text_hard
2 | task:
3 | - mix_evals_video2text_mc_hard
4 | - mix_evals_video2text_freeform_hard
5 | # - mix_evals_video2text_openended


--------------------------------------------------------------------------------
/llava-ov-15/src/trainer/__init__.py:
--------------------------------------------------------------------------------
1 | # from .dpo_trainer import QwenDPOTrainer
2 | from .sft_trainer import QwenSFTTrainer
3 | # from .grpo_trainer import QwenGRPOTrainer
4 | 
5 | __all__ = ["QwenSFTTrainer", "QwenDPOTrainer", "QwenGRPOTrainer"]


--------------------------------------------------------------------------------
/lmms-eval/miscs/example_eval.yaml:
--------------------------------------------------------------------------------
1 | - model: llava
2 |   model_args: pretrained=liuhaotian/llava-v1.5-7b
3 |   tasks: mmmu_val
4 |   batch_size: 1
5 |   log_samples: true
6 |   log_samples_suffix: eval_mmmu
7 |   output_path: "./logs/"
8 | 
9 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/_mmlu_stem.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_stem
 2 | group_alias: stem
 3 | task:
 4 |   - mmlu_stem_tasks
 5 | aggregate_metric_list:
 6 |   - metric: acc
 7 |     weight_by_size: True
 8 | metadata:
 9 |   version: 2
10 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmupd_option.yaml:
--------------------------------------------------------------------------------
1 | group: mmupd_option
2 | task:
3 |   - mmaad_option
4 |   - mmiasd_option
5 |   - mmivqd_option
6 | metadata:
7 |   version: 0.0
8 |   sys_prompt: ""
9 |   gpt_eval_model_name: "gpt-3.5-turbo-0125"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_diagnostics_and_laboratory_medicine.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Diagnostics_and_Laboratory_Medicine
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_diagnostics_and_laboratory_medicine"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/_mmlu_other.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_other
 2 | group_alias: other
 3 | task:
 4 |   - mmlu_other_tasks
 5 | aggregate_metric_list:
 6 |   - metric: acc
 7 |     weight_by_size: True
 8 | metadata:
 9 |   version: 2
10 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmupd_base.yaml:
--------------------------------------------------------------------------------
 1 | group: mmupd_base
 2 | task:
 3 |   - mmaad_base
 4 |   - mmiasd_base
 5 |   - mmivqd_base
 6 | metadata:
 7 |   version: 0.0
 8 |   sys_prompt: ""
 9 |   gpt_eval_model_name: "gpt-3.5-turbo-0125"
10 |   


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/worldqa/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/worldqa
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: multi-hop-reasoning 
6 | metadata:
7 |   version: 0.0
8 |   gpt_eval_model_name: "gpt-4-0613"


--------------------------------------------------------------------------------
/lmms-eval/tools/live_bench/live_bench/screen_shoter/__init__.py:
--------------------------------------------------------------------------------
1 | from live_bench.screen_shoter.screen import ScreenImage
2 | from live_bench.screen_shoter.screen_shoter import (
3 |     ScreenShoter,
4 |     get_shoter,
5 |     register_shoter,
6 | )
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/mmbench_cn.yaml:
--------------------------------------------------------------------------------
1 | group: mmbench_cn
2 | task:
3 |   - mmbench_cn_dev
4 |   - mmbench_cn_test
5 |   - mmbench_cn_cc
6 | metadata:
7 |   version: 0.0
8 |   gpt_eval_model_name: "gpt-3.5-turbo-0613"
9 |   sys_prompt: "有如下几个选项："


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/mmbench_en.yaml:
--------------------------------------------------------------------------------
1 | group: mmbench_en
2 | task:
3 |   - mmbench_en_dev
4 |   - mmbench_en_test
5 | metadata:
6 |   version: 0.0
7 |   sys_prompt: "There are several options:"
8 |   gpt_eval_model_name: "gpt-3.5-turbo-0613"
9 | 


--------------------------------------------------------------------------------
/lmms-eval/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 | from setuptools import setup
3 | 
4 | # This is to make sure that the package supports editable installs
5 | if __name__ == "__main__":
6 |     setuptools.setup(
7 |         license_files=["LICENSE"],
8 |     )
9 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vdc/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: wchai/lmms_VDC_test
 2 | dataset_kwargs:
 3 |   token: True
 4 |   video: True
 5 |   cache_dir: vdc_test
 6 | 
 7 | metadata:
 8 |   version: 0.0
 9 |   gpt_eval_model_name: gpt-4o-mini
10 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/egoschema/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/egoschema
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: egoschema
6 | lmms_eval_specific_kwargs:
7 |   default:
8 |     pre_prompt: ""
9 |     post_prompt: ""


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/infovqa/infovqa_val.yaml:
--------------------------------------------------------------------------------
1 | task: "infovqa_val"
2 | test_split: validation
3 | output_type: generate_until
4 | metric_list:
5 |   - metric: anls
6 |     aggregation: mean
7 |     higher_is_better: true
8 | include: _default_template_infovqa_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/megabench/metrics/parsing/dummy_parse.py:
--------------------------------------------------------------------------------
1 | class DummyParse:
2 |     @staticmethod
3 |     def parse(response: str, *args, **kwargs) -> dict:
4 |         """return the raw string without doing anything"""
5 |         return response.strip()
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/_mmlu_humanities.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_humanities
 2 | group_alias: humanities
 3 | task:
 4 |   - mmlu_humanities_tasks
 5 | aggregate_metric_list:
 6 |   - metric: acc
 7 |     weight_by_size: True
 8 | metadata:
 9 |   version: 2
10 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmupd_instruction.yaml:
--------------------------------------------------------------------------------
1 | group: mmupd_instruction
2 | task:
3 |   - mmaad_instruction
4 |   - mmiasd_instruction
5 |   - mmivqd_instruction
6 | metadata:
7 |   version: 0.0
8 |   sys_prompt: ""
9 |   gpt_eval_model_name: "gpt-3.5-turbo-0125"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/urdu_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: urdu
4 |     token: True
5 | task: "llava_in_the_wild_urdu"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmau/mmau_test_mini.yaml:
--------------------------------------------------------------------------------
1 | task: "mmau_test_mini"
2 | test_split: test_mini
3 | 
4 | metric_list:
5 |   - metric: accuracy
6 |     aggregation: !function utils.mmau_aggregate_results
7 |     higher_is_better: true
8 | 
9 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/arabic_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: arabic
4 |     token: True
5 | task: "llava_in_the_wild_arabic"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/french_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: french
4 |     token: True
5 | task: "llava_in_the_wild_french"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/hindi_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: hindi
4 |     token: True
5 | task: "llava_in_the_wild_hindi"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/spanish_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |   config: spanish
4 |   token: True
5 | task: "llava_in_the_wild_spanish"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/_refcoco.yaml:
--------------------------------------------------------------------------------
 1 | group: refcoco
 2 | task:
 3 | - refcoco_seg_test
 4 | - refcoco_seg_val
 5 | - refcoco_seg_testA
 6 | - refcoco_seg_testB
 7 | - refcoco_bbox_test
 8 | - refcoco_bbox_val
 9 | - refcoco_bbox_testA
10 | - refcoco_bbox_testB
11 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmau/mmau_test.yaml:
--------------------------------------------------------------------------------
1 | task: "mmau_test"
2 | test_split: test
3 | 
4 | metric_list:
5 |   - metric: submission
6 |     aggregation: !function utils.mmau_aggregate_results_for_submission
7 |     higher_is_better: true
8 | 
9 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/bengali_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: bengali
4 |     token: True
5 | task: "llava_in_the_wild_bengali"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/chinese_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: chinese
4 |     token: True
5 | task: "llava_in_the_wild_chinese"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/russian_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: russian
4 |     token: True
5 | task: "llava_in_the_wild_russian"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/textvqa/textvqa_test.yaml:
--------------------------------------------------------------------------------
1 | task: textvqa_test
2 | test_split: test
3 | metric_list:
4 |   - metric: submission
5 |     aggregation: !function utils.textvqa_aggregate_submissions
6 |     higher_is_better: true
7 | include: _default_template_textvqa_yaml
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/librispeech/librispeech_test_clean_long.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/Librispeech-concat
2 | task : "librispeech_test_clean_long"
3 | test_split: test_clean
4 | process_results: !function utils.librispeech_long_process_result
5 | include: _default_yaml_template


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/librispeech/librispeech_test_other_long.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/Librispeech-concat
2 | task : "librispeech_test_other_long"
3 | test_split: test_other
4 | process_results: !function utils.librispeech_long_process_result
5 | include: _default_yaml_template


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/_mmlu.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu
 2 | task:
 3 |   - mmlu_stem
 4 |   - mmlu_other
 5 |   - mmlu_social_sciences
 6 |   - mmlu_humanities
 7 | aggregate_metric_list:
 8 |   - metric: acc
 9 |     weight_by_size: True
10 | metadata:
11 |   version: 2
12 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/japanese_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: japanese
4 |     token: True
5 | task: "llava_in_the_wild_japanese"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/perceptiontest/test/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/PerceptionTest
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: perceptiontest
6 | lmms_eval_specific_kwargs:
7 |   default:
8 |     pre_prompt: ""
9 |     post_prompt: ""


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/megabench/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from metrics.aggregation_type import AggregationType
2 | from metrics.metric_type import MetricType
3 | from metrics.response_parse_type import ResponseParseType
4 | 
5 | __all__ = [AggregationType, MetricType, ResponseParseType]
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/_mmlu_social_sciences.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_social_sciences
 2 | group_alias: social sciences
 3 | task:
 4 |   - mmlu_social_sciences_tasks
 5 | aggregate_metric_list:
 6 |   - metric: acc
 7 |     weight_by_size: True
 8 | metadata:
 9 |   version: 2
10 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/perceptiontest/val/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/PerceptionTest_Val
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: perceptiontest_val
6 | lmms_eval_specific_kwargs:
7 |   default:
8 |     pre_prompt: ""
9 |     post_prompt: ""


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/llava_interleave_bench/_default_template_interleave_yaml:
--------------------------------------------------------------------------------
1 | output_type: generate_until
2 | generation_kwargs:
3 |   until:
4 |     - "ASSISTANT:"
5 |   image_aspect_ratio: pad
6 | metadata:
7 |   version: 0.0
8 |   api_type : openai
9 |   gpt_eval_model_name: "gpt-3.5-turbo"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathvista/mathvista_testmini.yaml:
--------------------------------------------------------------------------------
1 | group: mathvista_testmini
2 | task:
3 |   - mathvista_testmini_cot
4 |   - mathvista_testmini_solution
5 |   - mathvista_testmini_format
6 | metadata:
7 |   version: 0.0
8 |   gpt_eval_model_name: "gpt-3.5-turbo"
9 |   quick_extract: false


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/mmbench_cn_test.yaml:
--------------------------------------------------------------------------------
1 | task: mmbench_cn_test
2 | test_split: test
3 | metric_list:
4 |   - metric: submission
5 |     aggregation: !function cn_utils.mmbench_aggregate_test_results
6 |     higher_is_better: true
7 | include: _default_template_mmbench_cn_yaml
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/mmbench_en_test.yaml:
--------------------------------------------------------------------------------
1 | task: "mmbench_en_test"
2 | test_split: test
3 | include: _default_template_mmbench_en_yaml
4 | metric_list:
5 |   - metric: submission
6 |     aggregation: !function en_utils.mmbench_aggregate_test_results
7 |     higher_is_better: true
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_anatomy"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_action_count.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_action_count
3 | dataset_name: action_count
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_count
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_moving_count.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_moving_count
3 | dataset_name: moving_count
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: moving_count
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_state_change.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_state_change
3 | dataset_name: state_change
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: state_change
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_virology"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/megabench/metrics/aggregation/unsupported_agg.py:
--------------------------------------------------------------------------------
1 | from numbers import Number
2 | from typing import Dict
3 | 
4 | 
5 | class UnsupportedAggregation:
6 |     @staticmethod
7 |     def aggregate(scores: Dict[str, Number], weights: Dict[str, Number]) -> Number:
8 |         return -1
9 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/megabench/metrics/scoring/unsupported_scoring.py:
--------------------------------------------------------------------------------
1 | class UnsupportedScoring:
2 |     """Unsupported scoring."""
3 | 
4 |     @staticmethod
5 |     def match(response: str, correct_answer: str) -> int:
6 |         """Default response for unimplemented metrics."""
7 |         return -1
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_astronomy"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_marketing"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_nutrition"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmt/_default_template_yaml:
--------------------------------------------------------------------------------
1 | lmms_eval_specific_kwargs:
2 |   default:
3 |     pre_prompt: ""
4 |     post_prompt: "\nAnswer the question using a single character from the given options."
5 | generation_kwargs:
6 |   max_new_tokens: 8
7 | metadata:
8 |   version: 0.0
9 |   task_type: image


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_action_antonym.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_action_antonym
3 | dataset_name: action_antonym
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_antonym
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_object_shuffle.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_object_shuffle
3 | dataset_name: object_shuffle
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: object_shuffle
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vitatecs/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lscpku/VITATECS
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: vitatecs
6 | lmms_eval_specific_kwargs:
7 |   default:
8 |     pre_prompt: ""
9 |     post_prompt: "\nPlease response with a single letter (A or B):"


--------------------------------------------------------------------------------
/llava-ov-15/src/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .dpo_dataset import make_dpo_data_module
2 | from .sft_dataset import make_supervised_data_module
3 | from .grpo_dataset import make_grpo_data_module
4 | 
5 | __all__ =[
6 |     "make_dpo_data_module",
7 |     "make_supervised_data_module",
8 |     "make_grpo_data_module"
9 | ]


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_management"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_anatomy"
7 | "task_alias": "anatomy"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_action_sequence.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_action_sequence
3 | dataset_name: action_sequence
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_sequence
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_character_order.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_character_order
3 | dataset_name: character_order
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: character_order
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mix_evals/audio2text/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_kwargs:
 2 |   token: true
 3 | dataset_path: lmms-lab/MixEval-X-audio2text
 4 | lmms_eval_specific_kwargs:
 5 |   default:
 6 |     post_prompt: ""
 7 |     pre_prompt: ""
 8 | metadata:
 9 |   gpt_eval_model_name: gpt-4o-mini
10 |   version: 0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_human_aging"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_philosophy"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_prehistory"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_sociology"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_virology"
7 | "task_alias": "virology"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_action_prediction.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_action_prediction
3 | dataset_name: action_prediction
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_prediction
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_fine_grained_pose.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_fine_grained_pose
3 | dataset_name: fine_grained_pose
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: fine_grained_pose
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_moving_attribute.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_moving_attribute
3 | dataset_name: moving_attribute
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: moving_attribute
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_moving_direction.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_moving_direction
3 | dataset_name: moving_direction
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: moving_direction
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_object_existence.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_object_existence
3 | dataset_name: object_existence
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: object_existence
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_scene_transition.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_scene_transition
3 | dataset_name: scene_transition
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: scene_transition
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_unexpected_action.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_unexpected_action
3 | dataset_name: unexpected_action
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: unexpected_action
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/_task_utils/file_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | 
4 | def generate_submission_file(file_name, args, subpath="submissions"):
5 |     path = os.path.join(args.output_path, subpath)
6 |     os.makedirs(path, exist_ok=True)
7 |     path = os.path.join(path, file_name)
8 |     return os.path.abspath(path)
9 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/common_voice_15/common_voice_15_en.yaml:
--------------------------------------------------------------------------------
 1 | task : "common_voice_15_en"
 2 | dataset_name: en
 3 | lmms_eval_specific_kwargs:
 4 |   default:
 5 |     pre_prompt: ""
 6 |     post_prompt: ""
 7 |   qwen2_audio:
 8 |     pre_prompt: ""
 9 |     post_prompt: " <|en|>"
10 | include : _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/common_voice_15/common_voice_15_fr.yaml:
--------------------------------------------------------------------------------
 1 | task : "common_voice_15_fr"
 2 | dataset_name: fr
 3 | lmms_eval_specific_kwargs:
 4 |   default:
 5 |     pre_prompt: ""
 6 |     post_prompt: ""
 7 |   qwen2_audio:
 8 |     pre_prompt: ""
 9 |     post_prompt: " <|fr|>"
10 | include : _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_global_facts"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_miscellaneous"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_astronomy"
7 | "task_alias": "astronomy"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_marketing"
7 | "task_alias": "marketing"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_nutrition"
7 | "task_alias": "nutrition"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_object_interaction.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_object_interaction
3 | dataset_name: object_interaction
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: object_interaction
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_formal_logic"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_jurisprudence"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_management"
7 | "task_alias": "management"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_action_localization.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_action_localization
3 | dataset_name: action_localization
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_localization
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_fine_grained_action.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_fine_grained_action
3 | dataset_name: fine_grained_action
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: fine_grained_action
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/open_asr/openasr.yaml:
--------------------------------------------------------------------------------
 1 | group: openasr
 2 | task:
 3 | - open_asr_ami
 4 | - open_asr_common_voice
 5 | - open_asr_earnings22
 6 | - open_asr_gigaspeech
 7 | - open_asr_librispeech_test_clean
 8 | - open_asr_librispeech_test_other
 9 | - open_asr_spgispeech
10 | - open_asr_voxpopuli
11 | - open_asr_tedlium


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cmmmu/_default_template_cmmmu_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/CMMMU
2 | output_type: generate_until
3 | doc_to_visual: !function utils.cmmmu_doc_to_visual
4 | doc_to_text: !function utils.cmmmu_doc_to_text
5 | doc_to_target: "answer"
6 | generation_kwargs:
7 |   max_new_tokens: 16
8 |   image_aspect_ratio: original


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_business_ethics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_biology"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_physics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_econometrics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_moral_disputes"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_philosophy"
7 | "task_alias": "philosophy"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_prehistory"
7 | "task_alias": "prehistory"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_sociology"
7 | "task_alias": "sociology"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_anatomy_generative"
7 | "task_alias": "anatomy"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/.github/issue_template.md:
--------------------------------------------------------------------------------
1 | Before you open an issue, please check if a similar issue already exists or has been closed before.
2 | 
3 | ### When you open an issue, please be sure to include the following
4 | 
5 | - [ ] A descriptive title: [xxx] XXXX
6 | - [ ] A detailed description
7 | 
8 | Thank you for your contributions!
9 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/common_voice_15/common_voice_15_zh-CN.yaml:
--------------------------------------------------------------------------------
 1 | task : "common_voice_15_zh-CN"
 2 | dataset_name: zh-CN
 3 | lmms_eval_specific_kwargs:
 4 |   default:
 5 |     pre_prompt: ""
 6 |     post_prompt: ""
 7 |   qwen2_audio:
 8 |     pre_prompt: ""
 9 |     post_prompt: " <|zh|>"
10 | include : _default_template_yaml
11 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_abstract_algebra"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_medicine"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_machine_learning"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_medical_genetics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_moral_scenarios"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_world_religions"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_human_aging"
7 | "task_alias": "human_aging"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_virology_generative"
7 | "task_alias": "virology"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_egocentric_navigation.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_egocentric_navigation
3 | dataset_name: egocentric_navigation
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: egocentric_navigation
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/docvqa/docvqa_test.yaml:
--------------------------------------------------------------------------------
1 | task: "docvqa_test"
2 | test_split: test
3 | process_results: !function utils.docvqa_test_process_results
4 | metric_list:
5 |   - metric: submission
6 |     aggregation: !function utils.docvqa_test_aggregate_results
7 |     higher_is_better: true
8 | include: _default_template_docvqa_yaml
9 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_chemistry"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_computer_security"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_conceptual_physics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_human_sexuality"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_professional_law"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_global_facts"
7 | "task_alias": "global_facts"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_miscellaneous"
7 | "task_alias": "miscellaneous"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_anatomy"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_astronomy_generative"
7 | "task_alias": "astronomy"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_marketing_generative"
7 | "task_alias": "marketing"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_nutrition_generative"
7 | "task_alias": "nutrition"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/mmbench.yaml:
--------------------------------------------------------------------------------
 1 | group: mmbench
 2 | task:
 3 |   - mmbench_en_dev
 4 |   - mmbench_en_test
 5 |   - mmbench_cn_dev
 6 |   - mmbench_cn_test
 7 |   - mmbench_cn_cc
 8 |   - mmbench_ru_dev
 9 | metadata:
10 |   version: 0.0
11 |   sys_prompt: "There are several options:"
12 |   gpt_eval_model_name: "gpt-3.5-turbo-0613"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_clinical_knowledge"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_mathematics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_biology"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_physics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_international_law"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_logical_fallacies"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_public_relations"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_security_studies"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_econometrics"
7 | "task_alias": "econometrics"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_formal_logic"
7 | "task_alias": "formal_logic"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_jurisprudence"
7 | "task_alias": "jurisprudence"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_astronomy"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_marketing"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_nutrition"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_virology"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_management_generative"
7 | "task_alias": "management"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_counterfactual_inference.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_counterfactual_inference
3 | dataset_name: counterfactual_inference
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: counterfactual_inference
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms-eval/tools/live_bench/live_bench/data_generator/__init__.py:
--------------------------------------------------------------------------------
1 | from live_bench.data_generator.live_bench import LiveBench
2 | from live_bench.data_generator.live_bench_data import LiveBenchData
3 | from live_bench.data_generator.qa_generator import get_generator, get_random_generator
4 | from live_bench.data_generator.response import Response
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cvrr/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/CVRR-ES
 2 | dataset_kwargs:
 3 |   token: True
 4 |   video: True
 5 |   cache_dir: cvrr-es
 6 | lmms_eval_specific_kwargs:
 7 |   default:
 8 |     pre_prompt: ""
 9 |     post_prompt: ""
10 | 
11 | metadata:
12 |   version: 0.0
13 |   gpt_eval_model_name: gpt-3.5-turbo-0125


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathverse/mathverse_testmini_text.yaml:
--------------------------------------------------------------------------------
 1 | group: mathverse_testmini_text
 2 | task:
 3 |   - mathverse_testmini_text_lite
 4 |   - mathverse_testmini_text_dominant
 5 |   - mathverse_testmini_text_only
 6 | metadata:
 7 |   version: 0.0
 8 |   gpt_eval_model_name: "gpt-3.5-turbo"
 9 |   trunk_response: 30
10 |   quick_match: false


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_us_foreign_policy"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_biology"
7 | "task_alias": "college_biology"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_physics"
7 | "task_alias": "college_physics"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_management"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_anatomy"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_human_aging_generative"
7 | "task_alias": "human_aging"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_philosophy_generative"
7 | "task_alias": "philosophy"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_prehistory_generative"
7 | "task_alias": "prehistory"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_sociology_generative"
7 | "task_alias": "sociology"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vstar_bench/vstar_bench_direct_attributes.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: "vstar_bench_direct_attributes"
3 | dataset_kwargs:
4 |   category: "direct_attributes"
5 | metric_list:
6 |   - metric: vstar_direct_attributes_acc
7 |     aggregation: !function utils.vstar_aggregate_results
8 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vstar_bench/vstar_bench_relative_position.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: "vstar_bench_relative_position"
3 | dataset_kwargs:
4 |   category: "relative_position"
5 | metric_list:
6 |   - metric: vstar_relative_position_acc
7 |     aggregation: !function utils.vstar_aggregate_results
8 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/video_chatgpt/constants.py:
--------------------------------------------------------------------------------
 1 | CONTROLLER_HEART_BEAT_EXPIRATION = 30
 2 | WORKER_HEART_BEAT_INTERVAL = 15
 3 | 
 4 | LOGDIR = "."
 5 | 
 6 | 
 7 | # Defining model
 8 | DEFAULT_VIDEO_TOKEN = "<video>"
 9 | DEFAULT_VIDEO_PATCH_TOKEN = "<vid_patch>"
10 | DEFAULT_VID_START_TOKEN = "<vid_start>"
11 | DEFAULT_VID_END_TOKEN = "<vid_end>"
12 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/clotho_aqa/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/ClothoAQA
 2 | dataset_kwargs:
 3 |   token: True
 4 | doc_to_target: "answer"
 5 | doc_to_visual: !function utils.clotho_aqa_doc_to_audio
 6 | doc_to_text: !function utils.clotho_aqa_doc_to_text
 7 | 
 8 | metadata:
 9 |   gpt_eval_model_name: gpt-4o
10 |   version: 0.0
11 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_chemistry"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school chemistry.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_chemistry"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_professional_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_medicine"
2 | "description": "The following are questions (with answers) about professional\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_professional_medicine"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are multiple choice questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_abstract_algebra"
7 | "task_alias": "abstract_algebra"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_business_ethics"
7 | "task_alias": "business_ethics"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are multiple choice questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_machine_learning"
7 | "task_alias": "machine_learning"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_moral_disputes"
7 | "task_alias": "moral_disputes"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_human_aging"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_philosophy"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_prehistory"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_sociology"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_virology"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_global_facts_generative"
7 | "task_alias": "global_facts"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/video_chatgpt/utils.py:
--------------------------------------------------------------------------------
1 | def disable_torch_init():
2 |     """
3 |     Disable the redundant torch default initialization to accelerate model creation.
4 |     """
5 |     import torch
6 | 
7 |     setattr(torch.nn.Linear, "reset_parameters", lambda self: None)
8 |     setattr(torch.nn.LayerNorm, "reset_parameters", lambda self: None)
9 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_electrical_engineering.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "electrical_engineering"
2 | "description": "The following are questions (with answers) about electrical\
3 |   \ engineering.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_electrical_engineering"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_elementary_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "elementary_mathematics"
2 | "description": "The following are questions (with answers) about elementary\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_elementary_mathematics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_mathematics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school mathematics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_mathematics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_statistics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_statistics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school statistics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_statistics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_chemistry"
7 | "task_alias": "college_chemistry"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_medicine"
7 | "task_alias": "college_medicine"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are multiple choice questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_computer_security"
7 | "task_alias": "computer_security"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are multiple choice questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_medical_genetics"
7 | "task_alias": "medical_genetics"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_moral_scenarios"
7 | "task_alias": "moral_scenarios"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are multiple choice questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_world_religions"
7 | "task_alias": "world_religions"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_global_facts"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_astronomy"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_marketing"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_nutrition"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_formal_logic_generative"
7 | "task_alias": "formal_logic"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_miscellaneous_generative"
7 | "task_alias": "miscellaneous"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathverse/mathverse_testmini_vision.yaml:
--------------------------------------------------------------------------------
 1 | group: mathverse_testmini_vision
 2 | task:
 3 |   - mathverse_testmini_vision_intensive
 4 |   - mathverse_testmini_vision_dominant
 5 |   - mathverse_testmini_vision_only
 6 | metadata:
 7 |   version: 0.0
 8 |   gpt_eval_model_name: "gpt-3.5-turbo"
 9 |   trunk_response: 30
10 |   quick_match: false


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_college_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_computer_science"
2 | "description": "The following are questions (with answers) about college\
3 |   \ computer science.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_computer_science"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_geography.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_geography"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school geography.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_geography"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_us_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_us_history"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school us history.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_us_history"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_professional_accounting.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_accounting"
2 | "description": "The following are questions (with answers) about professional\
3 |   \ accounting.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_professional_accounting"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are multiple choice questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_conceptual_physics"
7 | "task_alias": "conceptual_physics"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_human_sexuality"
7 | "task_alias": "human_sexuality"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_law"
7 | "task_alias": "professional_law"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_formal_logic"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_miscellaneous"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_management"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_anatomy"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_econometrics_generative"
7 | "task_alias": "econometrics"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_jurisprudence_generative"
7 | "task_alias": "jurisprudence"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/videochatgpt/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/VideoChatGPT
 2 | dataset_kwargs:
 3 |   token: True
 4 |   video: True
 5 |   cache_dir: videochatgpt
 6 | lmms_eval_specific_kwargs:
 7 |   default:
 8 |     pre_prompt: ""
 9 |     post_prompt: ""
10 | 
11 | metadata:
12 |   version: 0.0
13 |   gpt_eval_model_name: gpt-3.5-turbo-0613


--------------------------------------------------------------------------------
/lmms-eval/examples/models/aria.sh:
--------------------------------------------------------------------------------
1 | export HF_HOME="~/.cache/huggingface"
2 | # pip install git+https://github.com/EvolvingLMMs-Lab/lmms-eval.git
3 | 
4 | accelerate launch --num_processes=8 --main_process_port 12348 -m lmms_eval \
5 |     --model aria \
6 |     --model_args pretrained=rhymes-ai/Aria \
7 |     --tasks ai2d,chartqa,docvqa_val,mmmu_pro \
8 |     --batch_size 1


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_psychology"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school psychology.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_psychology"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are multiple choice questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_clinical_knowledge"
7 | "task_alias": "clinical_knowledge"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_mathematics"
7 | "task_alias": "college_mathematics"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_biology"
7 | "task_alias": "high_school_biology"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_physics"
7 | "task_alias": "high_school_physics"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are multiple choice questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_international_law"
7 | "task_alias": "international_law"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are multiple choice questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_logical_fallacies"
7 | "task_alias": "logical_fallacies"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are multiple choice questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_public_relations"
7 | "task_alias": "public_relations"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are multiple choice questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_security_studies"
7 | "task_alias": "security_studies"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_business_ethics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_biology"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_physics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_econometrics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_jurisprudence"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_human_aging"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_philosophy"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_prehistory"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_sociology"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_astronomy"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_marketing"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_nutrition"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_virology"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_business_ethics_generative"
7 | "task_alias": "business_ethics"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_biology_generative"
7 | "task_alias": "college_biology"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_physics_generative"
7 | "task_alias": "college_physics"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_moral_disputes_generative"
7 | "task_alias": "moral_disputes"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_world_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_world_history"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school world history.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_world_history"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_professional_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_psychology"
2 | "description": "The following are questions (with answers) about professional\
3 |   \ psychology.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_professional_psychology"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are multiple choice questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_us_foreign_policy"
7 | "task_alias": "us_foreign_policy"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are multiple choice questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_abstract_algebra"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_medicine"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are multiple choice questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_machine_learning"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are multiple choice questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_medical_genetics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_moral_disputes"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_moral_scenarios"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are multiple choice questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_world_religions"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_global_facts"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_miscellaneous"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_management"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are multiple choice questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_abstract_algebra_generative"
7 | "task_alias": "abstract_algebra"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_medicine_generative"
7 | "task_alias": "college_medicine"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are multiple choice questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_machine_learning_generative"
7 | "task_alias": "machine_learning"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are multiple choice questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_medical_genetics_generative"
7 | "task_alias": "medical_genetics"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_moral_scenarios_generative"
7 | "task_alias": "moral_scenarios"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are multiple choice questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_world_religions_generative"
7 | "task_alias": "world_religions"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_computer_science"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school computer science.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_computer_science"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_high_school_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school chemistry.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_chemistry"
7 | "task_alias": "high_school_chemistry"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_chemistry"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are multiple choice questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_computer_security"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_human_sexuality"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_professional_law"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_econometrics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_formal_logic"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_jurisprudence"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_human_aging"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_philosophy"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_prehistory"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_sociology"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_chemistry_generative"
7 | "task_alias": "college_chemistry"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are multiple choice questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_computer_security_generative"
7 | "task_alias": "computer_security"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_human_sexuality_generative"
7 | "task_alias": "human_sexuality"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_law_generative"
7 | "task_alias": "professional_law"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu_pro/mmlu_pro_law.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about law. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_law"
4 | task_alias: "law"
5 | process_docs: !function utils.process_law
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmupd.yaml:
--------------------------------------------------------------------------------
 1 | group: mmupd
 2 | task:
 3 |   - mmaad_base
 4 |   - mmaad_option
 5 |   - mmaad_instruction
 6 |   - mmiasd_base
 7 |   - mmiasd_option
 8 |   - mmiasd_instruction
 9 |   - mmivqd_base
10 |   - mmivqd_option
11 |   - mmivqd_instruction
12 | metadata:
13 |   version: 0.0
14 |   sys_prompt: ""
15 |   gpt_eval_model_name: "gpt-3.5-turbo-0125"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_macroeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_macroeconomics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school macroeconomics.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_macroeconomics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_microeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_microeconomics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school microeconomics.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_microeconomics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_electrical_engineering.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "electrical_engineering"
2 | "description": "The following are multiple choice questions (with answers) about electrical\
3 |   \ engineering.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_electrical_engineering"
7 | "task_alias": "electrical_engineering"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_elementary_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "elementary_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about elementary\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_elementary_mathematics"
7 | "task_alias": "elementary_mathematics"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_high_school_statistics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_statistics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school statistics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_statistics"
7 | "task_alias": "high_school_statistics"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_professional_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_medicine"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_medicine"
7 | "task_alias": "professional_medicine"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are multiple choice questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_clinical_knowledge"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_mathematics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are multiple choice questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_conceptual_physics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_biology"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_physics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are multiple choice questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_international_law"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are multiple choice questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_logical_fallacies"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are multiple choice questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_public_relations"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are multiple choice questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_security_studies"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_business_ethics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_biology"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_physics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_moral_disputes"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_global_facts"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are multiple choice questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_clinical_knowledge_generative"
7 | "task_alias": "clinical_knowledge"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are multiple choice questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_conceptual_physics_generative"
7 | "task_alias": "conceptual_physics"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are multiple choice questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_international_law_generative"
7 | "task_alias": "international_law"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are multiple choice questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_logical_fallacies_generative"
7 | "task_alias": "logical_fallacies"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are multiple choice questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_public_relations_generative"
7 | "task_alias": "public_relations"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are multiple choice questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_security_studies_generative"
7 | "task_alias": "security_studies"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu_pro/mmlu_pro_math.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about math. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_math"
4 | task_alias: "math"
5 | process_docs: !function utils.process_math
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmaad_base.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmaad_base"
 2 | test_split: test
 3 | dataset_name: mmaad_base
 4 | lmms_eval_specific_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\n"
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmaad_base
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/examples/models/xai_grok.sh:
--------------------------------------------------------------------------------
 1 | export HF_HOME="~/.cache/huggingface"
 2 | export OPENAI_API_KEY="xai-xxxxxxxxxx"
 3 | export OPENAI_API_BASE="https://api.x.ai/v1"
 4 | 
 5 | 
 6 | python3 -m lmms_eval \
 7 |     --model openai_compatible \
 8 |     --model_args model_version=grok-2-vision-1212 \
 9 |     --tasks ai2d,chartqa,docvqa_val,mathvista_testmini,mmmu_pro \
10 |     --batch_size 1


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/infovqa/infovqa_test.yaml:
--------------------------------------------------------------------------------
 1 | task: "infovqa_test"
 2 | test_split: test
 3 | output_type: generate_until
 4 | process_results: !function utils.infovqa_test_process_results
 5 | metric_list:
 6 |   - metric: submission
 7 |     aggregation: !function utils.infovqa_test_aggregate_results
 8 |     higher_is_better: true
 9 | include: _default_template_infovqa_yaml
10 |   


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/megabench/requirements.txt:
--------------------------------------------------------------------------------
 1 | filelock==3.16.1
 2 | geopy==2.4.1
 3 | jieba==0.42.1
 4 | nltk==3.9.1
 5 | numpy==1.26.4
 6 | pronouncing==0.2.0
 7 | rapidfuzz==3.9.5
 8 | regex==2024.7.24
 9 | Requests==2.32.3
10 | sacrebleu==2.4.3
11 | sympy==1.13.2
12 | tqdm==4.66.4
13 | Unidecode==1.3.8
14 | antlr4-python3-runtime==4.11.0
15 | requests==2.32.3
16 | requests_cache==1.2.1


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_european_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_european_history"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school european history.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_european_history"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_high_school_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school mathematics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_mathematics"
7 | "task_alias": "high_school_mathematics"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are multiple choice questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_us_foreign_policy"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are multiple choice questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_abstract_algebra"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_chemistry"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_medicine"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are multiple choice questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_computer_security"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are multiple choice questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_machine_learning"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are multiple choice questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_medical_genetics"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_moral_scenarios"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are multiple choice questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_world_religions"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_formal_logic"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_miscellaneous"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_mathematics_generative"
7 | "task_alias": "college_mathematics"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_biology_generative"
7 | "task_alias": "high_school_biology"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_physics_generative"
7 | "task_alias": "high_school_physics"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are multiple choice questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_us_foreign_policy_generative"
7 | "task_alias": "us_foreign_policy"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu_pro/mmlu_pro_other.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about other. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_other"
4 | task_alias: "other"
5 | process_docs: !function utils.process_other
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmiasd_base.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmiasd_base"
 2 | test_split: test
 3 | dataset_name: mmiasd_base
 4 | lmms_eval_specific_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\n"
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmiasd_base
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmivqd_base.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmivqd_base"
 2 | test_split: test
 3 | dataset_name: mmivqd_base
 4 | lmms_eval_specific_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\n"
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmivqd_base
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/video_detail_description/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/VideoDetailDescription
 2 | dataset_kwargs:
 3 |   token: True
 4 |   video: True
 5 |   cache_dir: videochatgpt
 6 | lmms_eval_specific_kwargs:
 7 |   default:
 8 |     pre_prompt: ""
 9 |     post_prompt: ""
10 | 
11 | metadata:
12 |   version: 0.0
13 |   gpt_eval_model_name: gpt-3.5-turbo-0613


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_college_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_computer_science"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ computer science.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_computer_science"
7 | "task_alias": "college_computer_science"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_high_school_geography.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_geography"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school geography.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_geography"
7 | "task_alias": "high_school_geography"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_high_school_us_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_us_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school us history.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_us_history"
7 | "task_alias": "high_school_us_history"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_professional_accounting.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_accounting"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ accounting.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_accounting"
7 | "task_alias": "professional_accounting"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school chemistry.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_chemistry"
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_medicine"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_professional_medicine"
7 | 


--------------------------------------------------------------------------------