├── llava-ov-15 └── src │ ├── __init__.py │ ├── serve │ └── __init__.py │ ├── train │ └── __init__.py │ ├── trainer │ └── __init__.py │ └── dataset │ └── __init__.py ├── lmms-eval ├── lmms_eval │ ├── __init__.py │ ├── api │ │ └── __init__.py │ ├── caching │ │ └── __init__.py │ ├── models │ │ ├── model_utils │ │ │ └── __init__.py │ │ └── video_chatgpt │ │ │ ├── eval │ │ │ └── __init__.py │ │ │ ├── __init__.py │ │ │ ├── model │ │ │ └── __init__.py │ │ │ ├── constants.py │ │ │ └── utils.py │ ├── tasks │ │ ├── ocrbench_v2 │ │ │ ├── __init__.py │ │ │ └── spotting_eval │ │ │ │ └── __init__.py │ │ ├── _task_utils │ │ │ ├── gpt_eval_utils.py │ │ │ └── file_utils.py │ │ ├── cuva │ │ │ ├── cuva.yaml │ │ │ └── _default_template_yaml │ │ ├── multilingual-llava-bench-in-the-wild │ │ │ ├── README.md │ │ │ ├── urdu_llava_in_the_wild.yaml │ │ │ ├── arabic_llava_in_the_wild.yaml │ │ │ ├── french_llava_in_the_wild.yaml │ │ │ ├── hindi_llava_in_the_wild.yaml │ │ │ ├── spanish_llava_in_the_wild.yaml │ │ │ ├── bengali_llava_in_the_wild.yaml │ │ │ ├── chinese_llava_in_the_wild.yaml │ │ │ ├── russian_llava_in_the_wild.yaml │ │ │ └── japanese_llava_in_the_wild.yaml │ │ ├── funqa │ │ │ ├── funqa.yaml │ │ │ └── _default_template_yaml │ │ ├── mmt │ │ │ ├── mmt.yaml │ │ │ ├── mmt_mi.yaml │ │ │ └── _default_template_yaml │ │ ├── ok_vqa │ │ │ ├── _ok_vqa.yaml │ │ │ └── ok_vqa_val2014.yaml │ │ ├── flickr30k │ │ │ └── flickr30k.yaml │ │ ├── mmmu │ │ │ ├── mmmu.yaml │ │ │ ├── mmmu_group_img.yaml │ │ │ ├── arial.ttf │ │ │ └── _default_template_yaml │ │ ├── vqav2 │ │ │ └── _vqav2.yaml │ │ ├── cmmmu │ │ │ ├── _cmmmu.yaml │ │ │ └── _default_template_cmmmu_yaml │ │ ├── docvqa │ │ │ ├── docvqa.yaml │ │ │ ├── docvqa_val.yaml │ │ │ └── docvqa_test.yaml │ │ ├── mmau │ │ │ ├── mmau.yaml │ │ │ ├── mmau_test_mini.yaml │ │ │ └── mmau_test.yaml │ │ ├── iconqa │ │ │ ├── iconqa.yaml │ │ │ ├── iconqa_test.yaml │ │ │ └── iconqa_val.yaml │ │ ├── mmsearch │ │ │ └── retrieve_content │ │ │ │ └── tokenization │ │ │ │ └── __init__.py │ │ ├── nocaps │ │ │ ├── nocaps.yaml │ │ │ └── _default_template_nocaps_yaml │ │ ├── textvqa │ │ │ ├── _textvqa.yaml │ │ │ └── textvqa_test.yaml │ │ ├── websrc │ │ │ └── websrc.yaml │ │ ├── fleurs │ │ │ ├── fleurs.yaml │ │ │ ├── fleurs_en.yaml │ │ │ ├── fleurs_cmn_hans_cn.yaml │ │ │ └── fleurs_yue_hant_hk.yaml │ │ ├── infovqa │ │ │ ├── infovqa.yaml │ │ │ ├── infovqa_val.yaml │ │ │ └── infovqa_test.yaml │ │ ├── synthdog │ │ │ └── synthdog.yaml │ │ ├── covost2 │ │ │ ├── covost2.yaml │ │ │ ├── covost2_en_zh.yaml │ │ │ ├── covost2_zh_en.yaml │ │ │ ├── covost2_zh_en_dev.yaml │ │ │ └── covost2_zh_en_test.yaml │ │ ├── textcaps │ │ │ ├── textcaps.yaml │ │ │ └── _default_template_textcaps_yaml │ │ ├── worldqa │ │ │ ├── worldqa.yaml │ │ │ └── _default_template_yaml │ │ ├── vizwiz_vqa │ │ │ └── _vizwiz_vqa.yaml │ │ ├── pope │ │ │ └── pope_full.yaml │ │ ├── scienceqa │ │ │ └── scienceqa_full.yaml │ │ ├── arc │ │ │ └── arc_challenge.yaml │ │ ├── clotho_aqa │ │ │ ├── clotho_aqa.yaml │ │ │ └── _default_template_yaml │ │ ├── coco_cap │ │ │ ├── coco2014_cap.yaml │ │ │ ├── coco2017_cap.yaml │ │ │ ├── coco_karpathy.yaml │ │ │ └── coco_cap.yaml │ │ ├── gigaspeech │ │ │ └── gigaspeech.yaml │ │ ├── multidocvqa │ │ │ └── multidocvqa.yaml │ │ ├── screenspot │ │ │ ├── _screenspot.yaml │ │ │ ├── screenspot_rec_test.yaml │ │ │ └── screenspot_reg_test.yaml │ │ ├── vstar_bench │ │ │ ├── __init__.py │ │ │ ├── vstar_bench.yaml │ │ │ ├── vstar_bench_direct_attributes.yaml │ │ │ └── vstar_bench_relative_position.yaml │ │ ├── megabench │ │ │ ├── megabench_core.yaml │ │ │ ├── megabench_open.yaml │ │ │ ├── megabench_core_si.yaml │ │ │ ├── megabench_open_si.yaml │ │ │ ├── megabench.yaml │ │ │ ├── metrics │ │ │ │ ├── parsing │ │ │ │ │ └── dummy_parse.py │ │ │ │ ├── __init__.py │ │ │ │ ├── aggregation │ │ │ │ │ └── unsupported_agg.py │ │ │ │ └── scoring │ │ │ │ │ └── unsupported_scoring.py │ │ │ └── requirements.txt │ │ ├── nextqa │ │ │ ├── nextqa.yaml │ │ │ └── _default_template_yaml │ │ ├── qbench │ │ │ └── qbenchs_dev.yaml │ │ ├── wild_vision_bench │ │ │ └── wildvision_bench.yaml │ │ ├── capability │ │ │ ├── capability_OCR.yaml │ │ │ ├── capability_action.yaml │ │ │ ├── capability_event.yaml │ │ │ ├── capability_scene.yaml │ │ │ ├── capability_style.yaml │ │ │ ├── capability_camera_angle.yaml │ │ │ ├── capability_object_color.yaml │ │ │ ├── capability_object_number.yaml │ │ │ ├── capability_camera_movement.yaml │ │ │ ├── capability_object_category.yaml │ │ │ ├── capability_spatial_relation.yaml │ │ │ ├── capability_dynamic_object_number.yaml │ │ │ └── capability_character_identification.yaml │ │ ├── internal_eval │ │ │ ├── internal_eval.yaml │ │ │ └── _default_template_internal_eval_yaml │ │ ├── live_bench │ │ │ ├── live_bench_2406.yaml │ │ │ ├── live_bench_2407.yaml │ │ │ ├── live_bench_2409.yaml │ │ │ └── live_bench.yaml │ │ ├── livexiv_tqa │ │ │ ├── livexiv_tqa_v3.yaml │ │ │ ├── livexiv_tqa_v4.yaml │ │ │ ├── livexiv_tqa_v5.yaml │ │ │ ├── livexiv_tqa_v6.yaml │ │ │ ├── livexiv_tqa_v1.yaml │ │ │ ├── livexiv_tqa_v2.yaml │ │ │ └── livexiv_tqa.yaml │ │ ├── livexiv_vqa │ │ │ ├── livexiv_vqa_v3.yaml │ │ │ ├── livexiv_vqa_v4.yaml │ │ │ ├── livexiv_vqa_v5.yaml │ │ │ ├── livexiv_vqa_v6.yaml │ │ │ ├── livexiv_vqa_v1.yaml │ │ │ ├── livexiv_vqa_v2.yaml │ │ │ └── livexiv_vqa.yaml │ │ ├── mix_evals │ │ │ ├── audio2text │ │ │ │ ├── mix_evals_audio2text.yaml │ │ │ │ ├── mix_evals_audio2text_hard.yaml │ │ │ │ └── _default_template_yaml │ │ │ ├── image2text │ │ │ │ ├── mix_evals_image2text.yaml │ │ │ │ └── mix_evals_image2text_hard.yaml │ │ │ └── video2text │ │ │ │ ├── mix_evals_video2text.yaml │ │ │ │ └── mix_evals_video2text_hard.yaml │ │ ├── open_asr │ │ │ ├── openasr_ami.yaml │ │ │ ├── openasr_tedlium.yaml │ │ │ ├── openasr_voxpopuli.yaml │ │ │ ├── openasr_earnings22.yaml │ │ │ ├── openasr_gigaspeech.yaml │ │ │ ├── openasr_librispeech.yaml │ │ │ ├── openasr_spgispeech.yaml │ │ │ ├── openasr_common_voice.yaml │ │ │ ├── openasr_librispeech_test_clean.yaml │ │ │ ├── openasr_librispeech_test_other.yaml │ │ │ └── openasr.yaml │ │ ├── jmmmu │ │ │ ├── jmmmu_math.yaml │ │ │ ├── jmmmu_music.yaml │ │ │ ├── jmmmu_biology.yaml │ │ │ ├── jmmmu_design.yaml │ │ │ ├── jmmmu_manage.yaml │ │ │ ├── jmmmu_physics.yaml │ │ │ ├── jmmmu_chemistry.yaml │ │ │ ├── jmmmu_economics.yaml │ │ │ ├── jmmmu_finance.yaml │ │ │ ├── jmmmu_marketing.yaml │ │ │ ├── jmmmu_materials.yaml │ │ │ ├── jmmmu_pharmacy.yaml │ │ │ ├── jmmmu_accounting.yaml │ │ │ ├── jmmmu_psychology.yaml │ │ │ ├── jmmmu_agriculture.yaml │ │ │ ├── jmmmu_electronics.yaml │ │ │ ├── jmmmu_japanese_art.yaml │ │ │ ├── jmmmu_public_health.yaml │ │ │ ├── jmmmu_world_history.yaml │ │ │ ├── jmmmu_clinical_medicine.yaml │ │ │ ├── jmmmu_computer_science.yaml │ │ │ ├── jmmmu_energy_and_power.yaml │ │ │ ├── jmmmu_japanese_heritage.yaml │ │ │ ├── jmmmu_japanese_history.yaml │ │ │ ├── jmmmu_basic_medical_science.yaml │ │ │ ├── jmmmu_mechanical_engineering.yaml │ │ │ ├── jmmmu_architecture_and_engineering.yaml │ │ │ └── jmmmu_diagnostics_and_laboratory_medicine.yaml │ │ ├── detailcaps │ │ │ └── _default_template_detailcaps_yaml │ │ ├── refcoco │ │ │ ├── refcoco_seg_val.yaml │ │ │ ├── refcoco_bbox_val.yaml │ │ │ ├── refcoco_seg_test.yaml │ │ │ ├── refcoco_bbox_test.yaml │ │ │ ├── refcoco_bbox_testA.yaml │ │ │ ├── refcoco_bbox_testB.yaml │ │ │ ├── refcoco_seg_testA.yaml │ │ │ ├── refcoco_seg_testB.yaml │ │ │ ├── refcoco_bbox_rec_val.yaml │ │ │ ├── refcoco_bbox_rec_test.yaml │ │ │ ├── refcoco_bbox_rec_testA.yaml │ │ │ ├── refcoco_bbox_rec_testB.yaml │ │ │ └── _refcoco.yaml │ │ ├── refcoco+ │ │ │ ├── refcoco+_seg_val.yaml │ │ │ ├── refcoco+_bbox_val.yaml │ │ │ ├── refcoco+_seg_testA.yaml │ │ │ ├── refcoco+_seg_testB.yaml │ │ │ ├── refcoco+_bbox_testA.yaml │ │ │ ├── refcoco+_bbox_testB.yaml │ │ │ ├── refcoco+_bbox_rec_val.yaml │ │ │ ├── refcoco+_bbox_rec_testA.yaml │ │ │ ├── refcoco+_bbox_rec_testB.yaml │ │ │ └── _refcoco.yaml │ │ ├── refcocog │ │ │ ├── refcocog_seg_val.yaml │ │ │ ├── refcocog_bbox_val.yaml │ │ │ ├── refcocog_seg_test.yaml │ │ │ ├── _refcoco.yaml │ │ │ ├── refcocog_bbox_test.yaml │ │ │ ├── refcocog_bbox_rec_val.yaml │ │ │ └── refcocog_bbox_rec_test.yaml │ │ ├── common_voice_15 │ │ │ ├── common_voice_15.yaml │ │ │ ├── common_voice_15_en.yaml │ │ │ ├── common_voice_15_fr.yaml │ │ │ └── common_voice_15_zh-CN.yaml │ │ ├── librispeech │ │ │ ├── librispeech_long.yaml │ │ │ ├── librispeech.yaml │ │ │ ├── librispeech_test_clean_long.yaml │ │ │ └── librispeech_test_other_long.yaml │ │ ├── mmmu_pro │ │ │ ├── _default_template_yaml │ │ │ ├── mmmu_pro_cot.yaml │ │ │ └── mmmu_pro.yaml │ │ ├── videommmu │ │ │ └── video_mmmu.yaml │ │ ├── videochatgpt │ │ │ ├── _videochatgpt.yaml │ │ │ └── _default_template_yaml │ │ ├── vlmsareblind │ │ │ └── __init__.py │ │ ├── youcook2 │ │ │ └── _default_template_yaml │ │ ├── gpqa │ │ │ ├── n_shot │ │ │ │ ├── gpqa_main_n_shot.yaml │ │ │ │ ├── gpqa_diamond_n_shot.yaml │ │ │ │ └── gpqa_extended_n_shot.yaml │ │ │ ├── zeroshot │ │ │ │ ├── gpqa_main_zeroshot.yaml │ │ │ │ ├── gpqa_diamond_zeroshot.yaml │ │ │ │ └── gpqa_extended_zeroshot.yaml │ │ │ ├── cot_n_shot │ │ │ │ ├── gpqa_main_cot_n_shot.yaml │ │ │ │ ├── gpqa_diamond_cot_n_shot.yaml │ │ │ │ └── gpqa_extended_cot_n_shot.yaml │ │ │ ├── cot_zeroshot │ │ │ │ ├── gpqa_main_cot_zeroshot.yaml │ │ │ │ ├── gpqa_diamond_cot_zeroshot.yaml │ │ │ │ └── gpqa_extended_cot_zeroshot.yaml │ │ │ └── generative │ │ │ │ ├── gpqa_main_generative_n_shot.yaml │ │ │ │ ├── gpqa_diamond_generative_n_shot.yaml │ │ │ │ └── gpqa_extended_generative_n_shot.yaml │ │ ├── illusionvqa │ │ │ ├── illusionvqa_comprehension.yaml │ │ │ └── illusionvqa_soft_localization.yaml │ │ ├── olympiadbench │ │ │ └── olympiadbench.yaml │ │ ├── temporalbench │ │ │ └── temporalbench.yaml │ │ ├── hrbench │ │ │ └── hrbench.yaml │ │ ├── air_bench │ │ │ ├── _default_template_yaml │ │ │ ├── air_bench_chat.yaml │ │ │ └── air_bench_foundation.yaml │ │ ├── tempcompass │ │ │ └── _tempcompass.yaml │ │ ├── egothink │ │ │ └── _default_template_yaml │ │ ├── llava_interleave_bench │ │ │ ├── interleave_bench.yaml │ │ │ └── _default_template_interleave_yaml │ │ ├── mathvista │ │ │ ├── mathvista.yaml │ │ │ └── mathvista_testmini.yaml │ │ ├── vitatecs │ │ │ ├── _vitatecs.yaml │ │ │ └── _default_template_yaml │ │ ├── mmlu │ │ │ ├── default │ │ │ │ ├── _mmlu_stem.yaml │ │ │ │ ├── _mmlu_other.yaml │ │ │ │ ├── _mmlu_humanities.yaml │ │ │ │ ├── _mmlu.yaml │ │ │ │ ├── _mmlu_social_sciences.yaml │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ ├── mmlu_management.yaml │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ ├── mmlu_world_religions.yaml │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ └── mmlu_professional_accounting.yaml │ │ │ ├── continuation │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ ├── mmlu_management.yaml │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ ├── mmlu_world_religions.yaml │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ ├── mmlu_professional_medicine.yaml │ │ │ │ ├── mmlu_electrical_engineering.yaml │ │ │ │ ├── mmlu_elementary_mathematics.yaml │ │ │ │ ├── mmlu_high_school_mathematics.yaml │ │ │ │ ├── mmlu_high_school_statistics.yaml │ │ │ │ ├── mmlu_college_computer_science.yaml │ │ │ │ ├── mmlu_high_school_geography.yaml │ │ │ │ ├── mmlu_high_school_us_history.yaml │ │ │ │ ├── mmlu_professional_accounting.yaml │ │ │ │ ├── mmlu_high_school_psychology.yaml │ │ │ │ ├── mmlu_high_school_world_history.yaml │ │ │ │ ├── mmlu_professional_psychology.yaml │ │ │ │ ├── mmlu_high_school_computer_science.yaml │ │ │ │ ├── mmlu_high_school_macroeconomics.yaml │ │ │ │ ├── mmlu_high_school_microeconomics.yaml │ │ │ │ └── mmlu_high_school_european_history.yaml │ │ │ ├── generative │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ ├── mmlu_management.yaml │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ ├── mmlu_world_religions.yaml │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ └── mmlu_us_foreign_policy.yaml │ │ │ ├── flan_cot_zeroshot │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ ├── mmlu_management.yaml │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ ├── mmlu_world_religions.yaml │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ ├── mmlu_human_sexuality.yaml │ │ │ │ ├── mmlu_professional_law.yaml │ │ │ │ ├── mmlu_clinical_knowledge.yaml │ │ │ │ ├── mmlu_college_mathematics.yaml │ │ │ │ ├── mmlu_conceptual_physics.yaml │ │ │ │ ├── mmlu_high_school_biology.yaml │ │ │ │ ├── mmlu_high_school_physics.yaml │ │ │ │ ├── mmlu_international_law.yaml │ │ │ │ ├── mmlu_logical_fallacies.yaml │ │ │ │ ├── mmlu_public_relations.yaml │ │ │ │ ├── mmlu_security_studies.yaml │ │ │ │ ├── mmlu_us_foreign_policy.yaml │ │ │ │ ├── mmlu_high_school_chemistry.yaml │ │ │ │ └── mmlu_professional_medicine.yaml │ │ │ └── flan_n_shot │ │ │ │ ├── generative │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ ├── mmlu_management.yaml │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ ├── mmlu_miscellaneous.yaml │ │ │ │ ├── mmlu_econometrics.yaml │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ ├── mmlu_jurisprudence.yaml │ │ │ │ ├── mmlu_business_ethics.yaml │ │ │ │ ├── mmlu_college_biology.yaml │ │ │ │ ├── mmlu_college_physics.yaml │ │ │ │ ├── mmlu_moral_disputes.yaml │ │ │ │ ├── mmlu_abstract_algebra.yaml │ │ │ │ ├── mmlu_college_chemistry.yaml │ │ │ │ ├── mmlu_college_medicine.yaml │ │ │ │ ├── mmlu_computer_security.yaml │ │ │ │ ├── mmlu_machine_learning.yaml │ │ │ │ ├── mmlu_medical_genetics.yaml │ │ │ │ ├── mmlu_moral_scenarios.yaml │ │ │ │ └── mmlu_world_religions.yaml │ │ │ │ └── loglikelihood │ │ │ │ ├── mmlu_anatomy.yaml │ │ │ │ ├── mmlu_astronomy.yaml │ │ │ │ ├── mmlu_marketing.yaml │ │ │ │ ├── mmlu_nutrition.yaml │ │ │ │ ├── mmlu_virology.yaml │ │ │ │ ├── mmlu_management.yaml │ │ │ │ ├── mmlu_human_aging.yaml │ │ │ │ ├── mmlu_philosophy.yaml │ │ │ │ ├── mmlu_prehistory.yaml │ │ │ │ ├── mmlu_sociology.yaml │ │ │ │ ├── mmlu_global_facts.yaml │ │ │ │ ├── mmlu_formal_logic.yaml │ │ │ │ └── mmlu_miscellaneous.yaml │ │ ├── mmupd │ │ │ ├── mmupd_option.yaml │ │ │ ├── mmupd_base.yaml │ │ │ ├── mmupd_instruction.yaml │ │ │ ├── mmupd.yaml │ │ │ ├── mmaad_base.yaml │ │ │ ├── mmiasd_base.yaml │ │ │ └── mmivqd_base.yaml │ │ ├── mmbench │ │ │ ├── mmbench_cn.yaml │ │ │ ├── mmbench_en.yaml │ │ │ ├── mmbench_cn_test.yaml │ │ │ ├── mmbench_en_test.yaml │ │ │ └── mmbench.yaml │ │ ├── vdc │ │ │ └── _default_template_yaml │ │ ├── egoschema │ │ │ └── _default_template_yaml │ │ ├── perceptiontest │ │ │ ├── test │ │ │ │ └── _default_template_yaml │ │ │ └── val │ │ │ │ └── _default_template_yaml │ │ ├── mvbench │ │ │ ├── mvbench_action_count.yaml │ │ │ ├── mvbench_moving_count.yaml │ │ │ ├── mvbench_state_change.yaml │ │ │ ├── mvbench_action_antonym.yaml │ │ │ ├── mvbench_object_shuffle.yaml │ │ │ ├── mvbench_action_sequence.yaml │ │ │ ├── mvbench_character_order.yaml │ │ │ ├── mvbench_action_prediction.yaml │ │ │ ├── mvbench_fine_grained_pose.yaml │ │ │ ├── mvbench_moving_attribute.yaml │ │ │ ├── mvbench_moving_direction.yaml │ │ │ ├── mvbench_object_existence.yaml │ │ │ ├── mvbench_scene_transition.yaml │ │ │ ├── mvbench_unexpected_action.yaml │ │ │ ├── mvbench_object_interaction.yaml │ │ │ ├── mvbench_action_localization.yaml │ │ │ ├── mvbench_fine_grained_action.yaml │ │ │ ├── mvbench_egocentric_navigation.yaml │ │ │ └── mvbench_counterfactual_inference.yaml │ │ ├── cvrr │ │ │ └── _default_template_yaml │ │ ├── mathverse │ │ │ ├── mathverse_testmini_text.yaml │ │ │ └── mathverse_testmini_vision.yaml │ │ ├── mmlu_pro │ │ │ ├── mmlu_pro_law.yaml │ │ │ ├── mmlu_pro_math.yaml │ │ │ └── mmlu_pro_other.yaml │ │ └── video_detail_description │ │ │ └── _default_template_yaml │ └── loggers │ │ └── __init__.py ├── miscs │ ├── llava_result_check.md │ ├── llava_sglang_result_check.md │ ├── test_scienceqa.py │ └── example_eval.yaml ├── tools │ ├── live_bench │ │ ├── live_bench │ │ │ ├── data_generator │ │ │ │ ├── utils │ │ │ │ │ └── __init__.py │ │ │ │ ├── example │ │ │ │ │ └── example_website.png │ │ │ │ └── __init__.py │ │ │ ├── driver │ │ │ │ ├── .gitignore │ │ │ │ └── __init__.py │ │ │ ├── __init__.py │ │ │ ├── websites │ │ │ │ └── __init__.py │ │ │ └── screen_shoter │ │ │ │ └── __init__.py │ │ └── setup.py │ └── lite │ │ ├── embedder │ │ └── __init__.py │ │ └── shrinker │ │ └── __init__.py ├── setup.py ├── .github │ └── issue_template.md └── examples │ └── models │ ├── aria.sh │ └── xai_grok.sh ├── qwen-vl-utils ├── .python-version └── src │ └── qwen_vl_utils │ └── __init__.py └── docs ├── logo.png └── framework.png /llava-ov-15/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llava-ov-15/src/serve/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llava-ov-15/src/train/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lmms-eval/miscs/llava_result_check.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/caching/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /qwen-vl-utils/.python-version: -------------------------------------------------------------------------------- 1 | 3.8.19 2 | -------------------------------------------------------------------------------- /lmms-eval/miscs/llava_sglang_result_check.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/models/model_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/ocrbench_v2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/models/video_chatgpt/eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/_task_utils/gpt_eval_utils.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/ocrbench_v2/spotting_eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lmms-eval/tools/live_bench/live_bench/data_generator/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lmms-eval/tools/live_bench/live_bench/driver/.gitignore: -------------------------------------------------------------------------------- 1 | extensions/ 2 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/cuva/cuva.yaml: -------------------------------------------------------------------------------- 1 | group : cuva 2 | task: 3 | - cuva_test -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/funqa/funqa.yaml: -------------------------------------------------------------------------------- 1 | group : funqa 2 | task: 3 | - funqa_test -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JulietChoo/VisionSelector/HEAD/docs/logo.png -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmt/mmt.yaml: -------------------------------------------------------------------------------- 1 | group: mmt 2 | task: 3 | - mmt_val 4 | - mmt_test -------------------------------------------------------------------------------- /lmms-eval/tools/live_bench/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | setuptools.setup() 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/ok_vqa/_ok_vqa.yaml: -------------------------------------------------------------------------------- 1 | group: ok_vqa 2 | task: 3 | - ok_vqa_val2014 -------------------------------------------------------------------------------- /docs/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JulietChoo/VisionSelector/HEAD/docs/framework.png -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/flickr30k/flickr30k.yaml: -------------------------------------------------------------------------------- 1 | group: flickr30k 2 | task: 3 | - flickr30k_test -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmmu/mmmu.yaml: -------------------------------------------------------------------------------- 1 | group: mmmu 2 | task: 3 | - mmmu_val 4 | - mmmu_test 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/vqav2/_vqav2.yaml: -------------------------------------------------------------------------------- 1 | group: vqav2 2 | task: 3 | - vqav2_val 4 | - vqav2_test -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/cmmmu/_cmmmu.yaml: -------------------------------------------------------------------------------- 1 | group: cmmmu 2 | task: 3 | - cmmmu_val 4 | - cmmmu_test 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/docvqa/docvqa.yaml: -------------------------------------------------------------------------------- 1 | group: docvqa 2 | task: 3 | - docvqa_val 4 | - docvqa_test -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmau/mmau.yaml: -------------------------------------------------------------------------------- 1 | group: mmau 2 | task: 3 | - mmau_test_mini 4 | - mmau_test -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmt/mmt_mi.yaml: -------------------------------------------------------------------------------- 1 | group: mmt_mi 2 | task: 3 | - mmt_mi_val 4 | - mmt_mi_test -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/models/video_chatgpt/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import VideoChatGPTLlamaForCausalLM 2 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/iconqa/iconqa.yaml: -------------------------------------------------------------------------------- 1 | group: iconqa 2 | task: 3 | - iconqa_val 4 | - iconqa_test 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmsearch/retrieve_content/tokenization/__init__.py: -------------------------------------------------------------------------------- 1 | # Implement your code here. 2 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/nocaps/nocaps.yaml: -------------------------------------------------------------------------------- 1 | group : nocaps 2 | task: 3 | - nocaps_test 4 | - nocaps_val -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/textvqa/_textvqa.yaml: -------------------------------------------------------------------------------- 1 | group: textvqa 2 | task: 3 | - textvqa_val 4 | - textvqa_test -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/websrc/websrc.yaml: -------------------------------------------------------------------------------- 1 | group: websrc 2 | task: 3 | - websrc_val 4 | - websrc_test 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/fleurs/fleurs.yaml: -------------------------------------------------------------------------------- 1 | group: fleurs 2 | task: 3 | - fleurs_en 4 | - fleurs_cmn_hans_cn -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/infovqa/infovqa.yaml: -------------------------------------------------------------------------------- 1 | group: infovqa 2 | task: 3 | - infovqa_val 4 | - infovqa_test 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/synthdog/synthdog.yaml: -------------------------------------------------------------------------------- 1 | group: synthdog 2 | task: 3 | - synthdog_en 4 | - synthdog_zh -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/covost2/covost2.yaml: -------------------------------------------------------------------------------- 1 | group: covost2 2 | task: 3 | - covost2_en_zh 4 | - covost2_zh_en -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/textcaps/textcaps.yaml: -------------------------------------------------------------------------------- 1 | group : textcaps 2 | task: 3 | - textcaps_val 4 | - textcaps_test -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/worldqa/worldqa.yaml: -------------------------------------------------------------------------------- 1 | group: worldqa 2 | task: 3 | - worldqa_gen 4 | - worldqa_mc 5 | 6 | -------------------------------------------------------------------------------- /lmms-eval/tools/live_bench/live_bench/driver/__init__.py: -------------------------------------------------------------------------------- 1 | from live_bench.driver.load_driver import load_driver 2 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/fleurs/fleurs_en.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: en_us 2 | include: _default_template_yaml 3 | task: fleurs_en -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/vizwiz_vqa/_vizwiz_vqa.yaml: -------------------------------------------------------------------------------- 1 | group: vizwiz_vqa 2 | task: 3 | - vizwiz_vqa_val 4 | - vizwiz_vqa_test -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/pope/pope_full.yaml: -------------------------------------------------------------------------------- 1 | group : pope_full 2 | task: 3 | - pope_adv 4 | - pope_pop 5 | - pope_random -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/scienceqa/scienceqa_full.yaml: -------------------------------------------------------------------------------- 1 | group: scienceqa_full 2 | task: 3 | - scienceqa 4 | - scienceqa_img -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/arc/arc_challenge.yaml: -------------------------------------------------------------------------------- 1 | include: arc_easy.yaml 2 | task: arc_challenge 3 | dataset_name: ARC-Challenge 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/clotho_aqa/clotho_aqa.yaml: -------------------------------------------------------------------------------- 1 | group: clotho_aqa 2 | task: 3 | - clotho_aqa_val 4 | - clotho_aqa_test 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/coco_cap/coco2014_cap.yaml: -------------------------------------------------------------------------------- 1 | group : coco2014_cap 2 | task: 3 | - coco2014_cap_val 4 | - coco2014_cap_test -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/coco_cap/coco2017_cap.yaml: -------------------------------------------------------------------------------- 1 | group : coco2017_cap 2 | task: 3 | - coco2017_cap_val 4 | - coco2017_cap_test -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/gigaspeech/gigaspeech.yaml: -------------------------------------------------------------------------------- 1 | group: gigaspeech 2 | task: 3 | - gigaspeech_dev 4 | - gigaspeech_test 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/iconqa/iconqa_test.yaml: -------------------------------------------------------------------------------- 1 | task: "iconqa_test" 2 | test_split: test 3 | include: _default_template_docvqa_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/iconqa/iconqa_val.yaml: -------------------------------------------------------------------------------- 1 | task: "iconqa_val" 2 | test_split: val 3 | include: _default_template_docvqa_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/multidocvqa/multidocvqa.yaml: -------------------------------------------------------------------------------- 1 | group: multidocvqa 2 | task: 3 | - multidocvqa_val 4 | - multidocvqa_test 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/screenspot/_screenspot.yaml: -------------------------------------------------------------------------------- 1 | group: screenspot 2 | task: 3 | - screenspot_reg_test 4 | - screenspot_rec_test -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/vstar_bench/__init__.py: -------------------------------------------------------------------------------- 1 | # V* Benchmark: Guided Visual Search as a Core Mechanism in Multimodal LLMs 2 | -------------------------------------------------------------------------------- /lmms-eval/tools/lite/embedder/__init__.py: -------------------------------------------------------------------------------- 1 | from .BaseEmbedder import BaseEmbedder 2 | from .ClipBgeEmbedder import ClipBgeEmbedder 3 | -------------------------------------------------------------------------------- /lmms-eval/tools/lite/shrinker/__init__.py: -------------------------------------------------------------------------------- 1 | from .BaseShrinker import BaseShrinker 2 | from .EmbedShrinker import Embed_Shrinker 3 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/loggers/__init__.py: -------------------------------------------------------------------------------- 1 | from .evaluation_tracker import EvaluationTracker 2 | from .wandb_logger import WandbLogger 3 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/coco_cap/coco_karpathy.yaml: -------------------------------------------------------------------------------- 1 | group : coco_karpathy 2 | task: 3 | - coco_karpathy_val 4 | - coco_karpathy_test -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/covost2/covost2_en_zh.yaml: -------------------------------------------------------------------------------- 1 | group: covost2_en_zh 2 | task: 3 | - covost2_en_zh_test 4 | - covost2_en_zh_dev -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/covost2/covost2_zh_en.yaml: -------------------------------------------------------------------------------- 1 | group: covost2_zh_en 2 | task: 3 | - covost2_zh_en_test 4 | - covost2_zh_en_dev -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/megabench/megabench_core.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: core 2 | task: "megabench_core" 3 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/megabench/megabench_open.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: open 2 | task: "megabench_open" 3 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmmu/mmmu_group_img.yaml: -------------------------------------------------------------------------------- 1 | group: mmmu_group_img 2 | task: 3 | - mmmu_val_group_img 4 | - mmmu_test_group_img 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/nextqa/nextqa.yaml: -------------------------------------------------------------------------------- 1 | group: nextqa 2 | task: 3 | - nextqa_oe_test 4 | - nextqa_oe_val 5 | - nextqa_mc_test 6 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/ok_vqa/ok_vqa_val2014.yaml: -------------------------------------------------------------------------------- 1 | task: ok_vqa_val2014 2 | test_split: val2014 3 | include: _default_template_vqa_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/qbench/qbenchs_dev.yaml: -------------------------------------------------------------------------------- 1 | group: qbenchs_dev 2 | task: 3 | - qbench_dev 4 | - qbench2_dev 5 | - abench_dev 6 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmmu/arial.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JulietChoo/VisionSelector/HEAD/lmms-eval/lmms_eval/tasks/mmmu/arial.ttf -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/wild_vision_bench/wildvision_bench.yaml: -------------------------------------------------------------------------------- 1 | group: wildvision 2 | task: 3 | - wildvision_0617 4 | - wildvision_0630 -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/capability/capability_OCR.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | 3 | task: capability_OCR 4 | dataset_name: OCR 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/covost2/covost2_zh_en_dev.yaml: -------------------------------------------------------------------------------- 1 | task: "covost2_zh_en_dev" 2 | include: _default_template_zh_en_yaml 3 | test_split: dev 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/fleurs/fleurs_cmn_hans_cn.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: cmn_hans_cn 2 | task: fleurs_cmn_hans_cn 3 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/fleurs/fleurs_yue_hant_hk.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: yue_hant_hk 2 | include: _default_template_yaml 3 | task: fleurs_yue_hant_hk -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/capability/capability_action.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | 3 | task: capability_action 4 | dataset_name: action 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/capability/capability_event.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | 3 | task: capability_event 4 | dataset_name: event 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/capability/capability_scene.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | 3 | task: capability_scene 4 | dataset_name: scene 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/capability/capability_style.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | 3 | task: capability_style 4 | dataset_name: style 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/covost2/covost2_zh_en_test.yaml: -------------------------------------------------------------------------------- 1 | task: "covost2_zh_en_test" 2 | include: _default_template_zh_en_yaml 3 | test_split: test 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/internal_eval/internal_eval.yaml: -------------------------------------------------------------------------------- 1 | group: internal_eval 2 | task: 3 | - d170_cn 4 | - d170_en 5 | - dc100_en 6 | - dc200_cn 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/live_bench/live_bench_2406.yaml: -------------------------------------------------------------------------------- 1 | task: "live_bench_2406" 2 | dataset_name: 2024-06 3 | include: live_bench_template_yaml 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/live_bench/live_bench_2407.yaml: -------------------------------------------------------------------------------- 1 | task: "live_bench_2407" 2 | dataset_name: 2024-07 3 | include: live_bench_template_yaml 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/live_bench/live_bench_2409.yaml: -------------------------------------------------------------------------------- 1 | task: "live_bench_2409" 2 | dataset_name: 2024-09 3 | include: live_bench_template_yaml_v2 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v3.yaml: -------------------------------------------------------------------------------- 1 | task: "livexiv_tqa_v3" 2 | dataset_name: "v3-TQA" 3 | include: livexiv_tqa_template_yaml 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v4.yaml: -------------------------------------------------------------------------------- 1 | task: "livexiv_tqa_v4" 2 | dataset_name: "v4-TQA" 3 | include: livexiv_tqa_template_yaml 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v5.yaml: -------------------------------------------------------------------------------- 1 | task: "livexiv_tqa_v5" 2 | dataset_name: "v5-TQA" 3 | include: livexiv_tqa_template_yaml 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v6.yaml: -------------------------------------------------------------------------------- 1 | task: "livexiv_tqa_v6" 2 | dataset_name: "v6-TQA" 3 | include: livexiv_tqa_template_yaml 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v3.yaml: -------------------------------------------------------------------------------- 1 | task: "livexiv_vqa_v3" 2 | dataset_name: "v3-VQA" 3 | include: livexiv_vqa_template_yaml 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v4.yaml: -------------------------------------------------------------------------------- 1 | task: "livexiv_vqa_v4" 2 | dataset_name: "v4-VQA" 3 | include: livexiv_vqa_template_yaml 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v5.yaml: -------------------------------------------------------------------------------- 1 | task: "livexiv_vqa_v5" 2 | dataset_name: "v5-VQA" 3 | include: livexiv_vqa_template_yaml 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v6.yaml: -------------------------------------------------------------------------------- 1 | task: "livexiv_vqa_v6" 2 | dataset_name: "v6-VQA" 3 | include: livexiv_vqa_template_yaml 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mix_evals/audio2text/mix_evals_audio2text.yaml: -------------------------------------------------------------------------------- 1 | group: mix_evals_audio2text 2 | task: 3 | - mix_evals_audio2_text_freeform 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/open_asr/openasr_ami.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: ami 2 | test_split: test 3 | include: _default_template_yaml 4 | task: open_asr_ami -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_math.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Math 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_math" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v1.yaml: -------------------------------------------------------------------------------- 1 | task: "livexiv_tqa_v1" 2 | dataset_name: "TQA-2024-09-21" 3 | include: livexiv_tqa_template_yaml 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v2.yaml: -------------------------------------------------------------------------------- 1 | task: "livexiv_tqa_v2" 2 | dataset_name: "TQA-2024-10-26" 3 | include: livexiv_tqa_template_yaml 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v1.yaml: -------------------------------------------------------------------------------- 1 | task: "livexiv_vqa_v1" 2 | dataset_name: "VQA-2024-09-21" 3 | include: livexiv_vqa_template_yaml 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v2.yaml: -------------------------------------------------------------------------------- 1 | task: "livexiv_vqa_v2" 2 | dataset_name: "VQA-2024-10-26" 3 | include: livexiv_vqa_template_yaml 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/megabench/megabench_core_si.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: core_single_image 2 | task: "megabench_core_si" 3 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/megabench/megabench_open_si.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: open_single_image 2 | task: "megabench_open_si" 3 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/detailcaps/_default_template_detailcaps_yaml: -------------------------------------------------------------------------------- 1 | lmms_eval_specific_kwargs: 2 | default: 3 | prompt: "Describe this image in detail." -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_music.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Music 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_music" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/open_asr/openasr_tedlium.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: tedlium 2 | test_split: test 3 | include: _default_template_yaml 4 | task: open_asr_tedlium -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco/refcoco_seg_val.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_seg 2 | task: refcoco_seg_val 3 | test_split: val 4 | include: _default_template_seg_yaml 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_biology.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Biology 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_biology" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_design.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Design 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_design" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_manage.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Manage 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_manage" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_physics.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Physics 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_physics" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/megabench/megabench.yaml: -------------------------------------------------------------------------------- 1 | group: megabench 2 | task: 3 | - megabench_core 4 | - megabench_open 5 | - megabench_core_si 6 | - megabench_open_si -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mix_evals/audio2text/mix_evals_audio2text_hard.yaml: -------------------------------------------------------------------------------- 1 | group: mix_evals_audio2text_hard 2 | task: 3 | - mix_evals_audio2_text_freeform_hard 4 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmmu/_default_template_yaml: -------------------------------------------------------------------------------- 1 | generation_kwargs: 2 | max_new_tokens: 16 3 | 4 | metadata: 5 | version: 0.0 6 | interleaved_format: false -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/open_asr/openasr_voxpopuli.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: voxpopuli 2 | test_split: test 3 | include: _default_template_yaml 4 | task: open_asr_voxpopuli -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_seg_val.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco+_seg 2 | task: refcoco+_seg_val 3 | include: _default_template_seg_yaml 4 | test_split: val 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_val.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_bbox 2 | task: refcoco_bbox_val 3 | test_split: val 4 | include: _default_template_bbox_yaml 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco/refcoco_seg_test.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_seg 2 | task: refcoco_seg_test 3 | test_split: test 4 | include: _default_template_seg_yaml 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcocog/refcocog_seg_val.yaml: -------------------------------------------------------------------------------- 1 | group: refcocog_seg 2 | task: refcocog_seg_val 3 | include: _default_template_seg_yaml 4 | test_split: val 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/capability/capability_camera_angle.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | 3 | task: capability_camera_angle 4 | dataset_name: camera_angle 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/capability/capability_object_color.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | 3 | task: capability_object_color 4 | dataset_name: object_color 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/common_voice_15/common_voice_15.yaml: -------------------------------------------------------------------------------- 1 | group: common_voice_15 2 | task: 3 | - common_voice_15_zh-CN 4 | - common_voice_15_en 5 | - common_voice_15_fr -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_chemistry.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Chemistry 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_chemistry" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_economics.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Economics 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_economics" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_finance.yaml: -------------------------------------------------------------------------------- 1 | 2 | dataset_name: Finance 3 | tag: "jmmmu_culture_agnostic" 4 | task: "jmmmu_finance" 5 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_marketing.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Marketing 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_marketing" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_materials.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Materials 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_materials" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_pharmacy.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Pharmacy 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_pharmacy" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/librispeech/librispeech_long.yaml: -------------------------------------------------------------------------------- 1 | group: librispeech_long 2 | task: 3 | - librispeech_test_clean_long 4 | - librispeech_test_other_long 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmmu_pro/_default_template_yaml: -------------------------------------------------------------------------------- 1 | generation_kwargs: 2 | max_new_tokens: 256 3 | 4 | metadata: 5 | version: 0.0 6 | interleaved_format: false -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmmu_pro/mmmu_pro_cot.yaml: -------------------------------------------------------------------------------- 1 | group: mmmu_pro_cot 2 | task: 3 | - mmmu_pro_vision_cot 4 | - mmmu_pro_composite_cot 5 | - mmmu_pro_original_cot 6 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/open_asr/openasr_earnings22.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: earnings22 2 | test_split: test 3 | include: _default_template_yaml 4 | task: open_asr_earnings22 -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/open_asr/openasr_gigaspeech.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: gigaspeech 2 | test_split: test 3 | include: _default_template_yaml 4 | task: open_asr_gigaspeech -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/open_asr/openasr_librispeech.yaml: -------------------------------------------------------------------------------- 1 | group: openasr_librispeech 2 | task: 3 | - open_asr_librispeech_test_other 4 | - open_asr_librispeech_test_clean -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/open_asr/openasr_spgispeech.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: spgispeech 2 | test_split: test 3 | include: _default_template_yaml 4 | task: open_asr_spgispeech -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_bbox_val.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco+_bbox 2 | task: refcoco+_bbox_val 3 | include: _default_template_bbox_yaml 4 | test_split: val 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_seg_testA.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco+_seg 2 | task: refcoco+_seg_testA 3 | include: _default_template_seg_yaml 4 | test_split: testA 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_seg_testB.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco+_seg 2 | task: refcoco+_seg_testB 3 | include: _default_template_seg_yaml 4 | test_split: testB 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_test.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_bbox 2 | task: refcoco_bbox_test 3 | test_split: test 4 | include: _default_template_bbox_yaml 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_testA.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_bbox 2 | task: refcoco_bbox_testA 3 | test_split: testA 4 | include: _default_template_bbox_yaml 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_testB.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_bbox 2 | task: refcoco_bbox_testB 3 | test_split: testB 4 | include: _default_template_bbox_yaml 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco/refcoco_seg_testA.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_seg 2 | task: refcoco_seg_testA 3 | test_split: testA 4 | include: _default_template_seg_yaml 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco/refcoco_seg_testB.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_seg 2 | task: refcoco_seg_testB 3 | test_split: testB 4 | include: _default_template_seg_yaml 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcocog/refcocog_bbox_val.yaml: -------------------------------------------------------------------------------- 1 | group: refcocog_bbox 2 | task: refcocog_bbox_val 3 | include: _default_template_bbox_yaml 4 | test_split: val 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcocog/refcocog_seg_test.yaml: -------------------------------------------------------------------------------- 1 | group: refcocog_seg 2 | task: refcocog_seg_test 3 | include: _default_template_seg_yaml 4 | test_split: test 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/videommmu/video_mmmu.yaml: -------------------------------------------------------------------------------- 1 | group: video_mmmu 2 | task: 3 | - video_mmmu_adaptation 4 | - video_mmmu_comprehension 5 | - video_mmmu_perception 6 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/capability/capability_object_number.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | 3 | task: capability_object_number 4 | dataset_name: object_number 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_accounting.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Accounting 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_accounting" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_psychology.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Psychology 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_psychology" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/open_asr/openasr_common_voice.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: common_voice 2 | test_split: test 3 | include: _default_template_yaml 4 | task: open_asr_common_voice -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_bbox_testA.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco+_bbox 2 | task: refcoco+_bbox_testA 3 | include: _default_template_bbox_yaml 4 | test_split: testA 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_bbox_testB.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco+_bbox 2 | task: refcoco+_bbox_testB 3 | include: _default_template_bbox_yaml 4 | test_split: testB 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcocog/_refcoco.yaml: -------------------------------------------------------------------------------- 1 | group: refcocog 2 | task: 3 | - refcocog_seg_test 4 | - refcocog_seg_val 5 | - refcocog_bbox_test 6 | - refcocog_bbox_val 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcocog/refcocog_bbox_test.yaml: -------------------------------------------------------------------------------- 1 | group: refcocog_bbox 2 | task: refcocog_bbox_test 3 | include: _default_template_bbox_yaml 4 | test_split: test 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/screenspot/screenspot_rec_test.yaml: -------------------------------------------------------------------------------- 1 | group: screenspot_rec 2 | task: screenspot_rec_test 3 | include: _default_template_rec_yaml 4 | test_split: test 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/screenspot/screenspot_reg_test.yaml: -------------------------------------------------------------------------------- 1 | group: screenspot_reg 2 | task: screenspot_reg_test 3 | include: _default_template_reg_yaml 4 | test_split: test 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/textcaps/_default_template_textcaps_yaml: -------------------------------------------------------------------------------- 1 | lmms_eval_specific_kwargs: 2 | default: 3 | prompt: Provide a one-sentence caption for the provided image. -------------------------------------------------------------------------------- /lmms-eval/tools/live_bench/live_bench/__init__.py: -------------------------------------------------------------------------------- 1 | from .api.live_bench import generate_live_bench, generate_live_bench_from_path 2 | from .data_generator import LiveBench 3 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/capability/capability_camera_movement.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | 3 | task: capability_camera_movement 4 | dataset_name: camera_movement 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/capability/capability_object_category.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | 3 | task: capability_object_category 4 | dataset_name: object_category 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/capability/capability_spatial_relation.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | 3 | task: capability_spatial_relation 4 | dataset_name: spatial_relation 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/coco_cap/coco_cap.yaml: -------------------------------------------------------------------------------- 1 | group : coco_cap 2 | task: 3 | - coco2014_cap_val 4 | - coco2014_cap_test 5 | - coco2017_cap_val 6 | - coco2017_cap_test 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_agriculture.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Agriculture 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_agriculture" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_electronics.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Electronics 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_electronics" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_japanese_art.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Japanese_Art 2 | tag: "jmmmu_culture_specific" 3 | task: "jmmmu_japanese_art" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text.yaml: -------------------------------------------------------------------------------- 1 | group: mix_evals_image2text 2 | task: 3 | - mix_evals_image2text_mc 4 | - mix_evals_image2text_freeform 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_rec_val.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_bbox_rec 2 | task: refcoco_bbox_rec_val 3 | test_split: val 4 | include: _default_template_bbox_rec_yaml 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/videochatgpt/_videochatgpt.yaml: -------------------------------------------------------------------------------- 1 | group: videochatgpt 2 | task: 3 | - videochatgpt_gen 4 | - videochatgpt_temporal 5 | - videochatgpt_consistency 6 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/vlmsareblind/__init__.py: -------------------------------------------------------------------------------- 1 | # VLMs Are Blind benchmark task 2 | # Tests visual reasoning capabilities through path-counting in subway connection diagrams 3 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/youcook2/_default_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/YouCook2 2 | dataset_kwargs: 3 | token: True 4 | video: True 5 | cache_dir: YouCookIIVideos 6 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/gpqa/n_shot/gpqa_main_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_main 3 | include: _gpqa_n_shot_yaml 4 | task: gpqa_main_n_shot 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/illusionvqa/illusionvqa_comprehension.yaml: -------------------------------------------------------------------------------- 1 | include: illusionvqa.yaml 2 | task: illusionvqa_comprehension 3 | dataset_path: csebuetnlp/illusionVQA-Comprehension -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_public_health.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Public_Health 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_public_health" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_world_history.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: World_History 2 | tag: "jmmmu_culture_specific" 3 | task: "jmmmu_world_history" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/olympiadbench/olympiadbench.yaml: -------------------------------------------------------------------------------- 1 | group: olympiadbench 2 | task: 3 | - olympiadbench_test_en 4 | - olympiadbench_test_cn 5 | metadata: 6 | - version: 0.0 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_bbox_rec_val.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco+_bbox_rec 2 | task: refcoco+_bbox_rec_val 3 | include: _default_template_bbox_rec_yaml 4 | test_split: val 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_rec_test.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_bbox_rec 2 | task: refcoco_bbox_rec_test 3 | test_split: test 4 | include: _default_template_bbox_rec_yaml 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcocog/refcocog_bbox_rec_val.yaml: -------------------------------------------------------------------------------- 1 | group: refcocog_bbox_rec 2 | task: refcocog_bbox_rec_val 3 | include: _default_template_bbox_rec_yaml 4 | test_split: val 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/temporalbench/temporalbench.yaml: -------------------------------------------------------------------------------- 1 | group: temporalbench 2 | task: 3 | - temporalbench_short_qa 4 | - temporalbench_long_qa 5 | - temporalbench_short_caption 6 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/gpqa/zeroshot/gpqa_main_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_main 3 | include: _gpqa_zeroshot_yaml 4 | task: gpqa_main_zeroshot 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_bbox_rec_testA.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco+_bbox_rec 2 | task: refcoco+_bbox_rec_testA 3 | include: _default_template_bbox_rec_yaml 4 | test_split: testA 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_bbox_rec_testB.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco+_bbox_rec 2 | task: refcoco+_bbox_rec_testB 3 | include: _default_template_bbox_rec_yaml 4 | test_split: testB 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_rec_testA.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_bbox_rec 2 | task: refcoco_bbox_rec_testA 3 | test_split: testA 4 | include: _default_template_bbox_rec_yaml 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_rec_testB.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_bbox_rec 2 | task: refcoco_bbox_rec_testB 3 | test_split: testB 4 | include: _default_template_bbox_rec_yaml 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcocog/refcocog_bbox_rec_test.yaml: -------------------------------------------------------------------------------- 1 | group: refcocog_bbox_rec 2 | task: refcocog_bbox_rec_test 3 | include: _default_template_bbox_rec_yaml 4 | test_split: test 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/capability/capability_dynamic_object_number.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | 3 | task: capability_dynamic_object_number 4 | dataset_name: dynamic_object_number 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/gpqa/n_shot/gpqa_diamond_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_diamond 3 | include: _gpqa_n_shot_yaml 4 | task: gpqa_diamond_n_shot 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/gpqa/n_shot/gpqa_extended_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_extended 3 | include: _gpqa_n_shot_yaml 4 | task: gpqa_extended_n_shot 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/illusionvqa/illusionvqa_soft_localization.yaml: -------------------------------------------------------------------------------- 1 | include: illusionvqa.yaml 2 | task: illusionvqa_soft_localization 3 | dataset_path: csebuetnlp/illusionVQA-Soft-Localization -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_clinical_medicine.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Clinical_Medicine 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_clinical_medicine" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_computer_science.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Computer_Science 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_computer_science" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_energy_and_power.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Energy_and_Power 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_energy_and_power" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_japanese_heritage.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Japanese_Heritage 2 | tag: "jmmmu_culture_specific" 3 | task: "jmmmu_japanese_heritage" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_japanese_history.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Japanese_History 2 | tag: "jmmmu_culture_specific" 3 | task: "jmmmu_japanese_history" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/tools/live_bench/live_bench/websites/__init__.py: -------------------------------------------------------------------------------- 1 | from live_bench.websites.load_website import load_websites, load_websites_from_file 2 | from live_bench.websites.website import Website 3 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/models/video_chatgpt/model/__init__.py: -------------------------------------------------------------------------------- 1 | from lmms_eval.models.video_chatgpt.model.video_chatgpt import ( 2 | VideoChatGPTConfig, 3 | VideoChatGPTLlamaForCausalLM, 4 | ) 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/gpqa/cot_n_shot/gpqa_main_cot_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_main 3 | include: _gpqa_cot_n_shot_yaml 4 | task: gpqa_main_cot_n_shot 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/gpqa/zeroshot/gpqa_diamond_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_diamond 3 | include: _gpqa_zeroshot_yaml 4 | task: gpqa_diamond_zeroshot 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/hrbench/hrbench.yaml: -------------------------------------------------------------------------------- 1 | group: hrbench 2 | task: 3 | - hrbench4k 4 | - hrbench8k 5 | metadata: 6 | version: 0.0 7 | gpt_eval_model_name: "gpt-3.5-turbo" 8 | max_workers: 1 -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/open_asr/openasr_librispeech_test_clean.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: librispeech 2 | test_split: test.clean 3 | include: _default_template_yaml 4 | task: open_asr_librispeech_test_clean -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/open_asr/openasr_librispeech_test_other.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: librispeech 2 | test_split: test.other 3 | include: _default_template_yaml 4 | task: open_asr_librispeech_test_other -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/air_bench/_default_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/AIR_Bench 2 | dataset_kwargs: 3 | token: True 4 | 5 | metadata: 6 | gpt_eval_model_name: gpt-4o 7 | version: 0.0 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/air_bench/air_bench_chat.yaml: -------------------------------------------------------------------------------- 1 | group: air_bench_chat 2 | task: 3 | - air_bench_chat_sound 4 | - air_bench_chat_music 5 | - air_bench_chat_speech 6 | - air_bench_chat_mixed 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/capability/capability_character_identification.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | 3 | task: capability_character_identification 4 | dataset_name: character_identification 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/gpqa/zeroshot/gpqa_extended_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_extended 3 | include: _gpqa_zeroshot_yaml 4 | task: gpqa_extended_zeroshot 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/internal_eval/_default_template_internal_eval_yaml: -------------------------------------------------------------------------------- 1 | lmms_eval_specific_kwargs: 2 | default: 3 | pre_prompt: "" 4 | post_prompt: "" 5 | process_results_use_image: true 6 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmmu_pro/mmmu_pro.yaml: -------------------------------------------------------------------------------- 1 | group: mmmu_pro 2 | task: 3 | - mmmu_pro_vision 4 | # - mmmu_pro_composite # removing composite task in formal MMMU-Pro evaluation 5 | - mmmu_pro_standard 6 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/tempcompass/_tempcompass.yaml: -------------------------------------------------------------------------------- 1 | group: tempcompass 2 | task: 3 | - tempcompass_multi_choice 4 | - tempcompass_yes_no 5 | - tempcompass_caption_matching 6 | - tempcompass_captioning 7 | -------------------------------------------------------------------------------- /lmms-eval/miscs/test_scienceqa.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | 3 | dataset = load_dataset("Otter-AI/ScienceQA", trust_remote_code=True)["test"] 4 | for doc in dataset: 5 | print(doc["id"]) 6 | -------------------------------------------------------------------------------- /qwen-vl-utils/src/qwen_vl_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .vision_process import ( 2 | extract_vision_info, 3 | fetch_image, 4 | fetch_video, 5 | process_vision_info, 6 | smart_resize, 7 | ) 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/air_bench/air_bench_foundation.yaml: -------------------------------------------------------------------------------- 1 | group: air_bench_foundation 2 | task: 3 | - air_bench_foundation_sound 4 | - air_bench_foundation_music 5 | - air_bench_foundation_speech 6 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/gpqa/cot_n_shot/gpqa_diamond_cot_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_diamond 3 | include: _gpqa_cot_n_shot_yaml 4 | task: gpqa_diamond_cot_n_shot 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/gpqa/cot_n_shot/gpqa_extended_cot_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_extended 3 | include: _gpqa_cot_n_shot_yaml 4 | task: gpqa_extended_cot_n_shot 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/gpqa/cot_zeroshot/gpqa_main_cot_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_main 3 | include: _gpqa_cot_zeroshot_yaml 4 | task: gpqa_main_cot_zeroshot 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_basic_medical_science.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Basic_Medical_Science 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_basic_medical_science" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_mechanical_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Mechanical_Engineering 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_mechanical_engineering" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/librispeech/librispeech.yaml: -------------------------------------------------------------------------------- 1 | group: librispeech 2 | task: 3 | - librispeech_dev_clean 4 | - librispeech_dev_other 5 | - librispeech_test_clean 6 | - librispeech_test_other 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/egothink/_default_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: EgoLife-v1/Egothink 2 | dataset_kwargs: 3 | token: True 4 | test_split: test 5 | metadata: 6 | version: 0.0 7 | gpt_eval_model_name: "gpt-4" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/gpqa/cot_zeroshot/gpqa_diamond_cot_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_diamond 3 | include: _gpqa_cot_zeroshot_yaml 4 | task: gpqa_diamond_cot_zeroshot 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text.yaml: -------------------------------------------------------------------------------- 1 | group: mix_evals_video2text 2 | task: 3 | - mix_evals_video2text_mc 4 | - mix_evals_video2text_freeform 5 | # - mix_evals_video2text_openended -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/gpqa/cot_zeroshot/gpqa_extended_cot_zeroshot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_extended 3 | include: _gpqa_cot_zeroshot_yaml 4 | task: gpqa_extended_cot_zeroshot 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/gpqa/generative/gpqa_main_generative_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_main 3 | include: _gpqa_generative_n_shot_yaml 4 | task: gpqa_main_generative_n_shot 5 | -------------------------------------------------------------------------------- /lmms-eval/tools/live_bench/live_bench/data_generator/example/example_website.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JulietChoo/VisionSelector/HEAD/lmms-eval/tools/live_bench/live_bench/data_generator/example/example_website.png -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/livexiv_tqa/livexiv_tqa.yaml: -------------------------------------------------------------------------------- 1 | group: livexiv_tqa 2 | task: 3 | - livexiv_tqa_v1 4 | - livexiv_tqa_v2 5 | - livexiv_tqa_v3 6 | - livexiv_tqa_v4 7 | - livexiv_tqa_v5 8 | - livexiv_tqa_v6 9 | 10 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/livexiv_vqa/livexiv_vqa.yaml: -------------------------------------------------------------------------------- 1 | group: livexiv_vqa 2 | task: 3 | - livexiv_vqa_v1 4 | - livexiv_vqa_v2 5 | - livexiv_vqa_v3 6 | - livexiv_vqa_v4 7 | - livexiv_vqa_v5 8 | - livexiv_vqa_v6 9 | 10 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/llava_interleave_bench/interleave_bench.yaml: -------------------------------------------------------------------------------- 1 | group: llava_interleave_bench 2 | task: 3 | - llava_interleave_bench_in_domain 4 | - llava_interleave_bench_out_domain 5 | - llava_interleave_bench_multi_view -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco+/_refcoco.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco+ 2 | task: 3 | - refcoco+_seg_val 4 | - refcoco+_seg_testA 5 | - refcoco+_seg_testB 6 | - refcoco+_bbox_val 7 | - refcoco+_bbox_testA 8 | - refcoco+_bbox_testB 9 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/vstar_bench/vstar_bench.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | metric_list: 3 | - metric: vstar_overall_acc 4 | aggregation: !function utils.vstar_aggregate_results 5 | higher_is_better: true -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/gpqa/generative/gpqa_diamond_generative_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_diamond 3 | include: _gpqa_generative_n_shot_yaml 4 | task: gpqa_diamond_generative_n_shot 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_architecture_and_engineering.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Architecture_and_Engineering 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_architecture_and_engineering" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/live_bench/live_bench.yaml: -------------------------------------------------------------------------------- 1 | group: live_bench 2 | task: 3 | - live_bench_2406 4 | - live_bench_2407 5 | - live_bench_2409 6 | 7 | metadata: 8 | api_type: azure 9 | eval_with_mini: false 10 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mathvista/mathvista.yaml: -------------------------------------------------------------------------------- 1 | group: mathvista 2 | task: 3 | - mathvista_testmini 4 | - mathvista_test 5 | metadata: 6 | version: 0.0 7 | gpt_eval_model_name: "gpt-3.5-turbo" 8 | quick_extract: false -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/nextqa/_default_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/NExTQA 2 | dataset_kwargs: 3 | token: True 4 | video: True 5 | cache_dir: nextqa 6 | metadata: 7 | version: 0.0.1 8 | load_package: True 9 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/nocaps/_default_template_nocaps_yaml: -------------------------------------------------------------------------------- 1 | lmms_eval_specific_kwargs: 2 | default: 3 | prompt: "Provide a one-sentence caption for the provided image." 4 | plm: 5 | prompt: "Describe the image briefly." -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/vitatecs/_vitatecs.yaml: -------------------------------------------------------------------------------- 1 | group: vitatecs 2 | task: 3 | - vitatecs_direction 4 | - vitatecs_intensity 5 | - vitatecs_sequence 6 | - vitatecs_compositionality 7 | - vitatecs_localization 8 | - vitatecs_type 9 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/cuva/_default_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: fesvhtr/CUVA_LMMs 2 | dataset_kwargs: 3 | token: True 4 | video: True 5 | cache_dir: cuva 6 | metadata: 7 | version: 0.0 8 | gpt_eval_model_name: "gpt-4-0613" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/docvqa/docvqa_val.yaml: -------------------------------------------------------------------------------- 1 | task: "docvqa_val" 2 | test_split: validation 3 | metric_list: 4 | - metric: anls 5 | aggregation: mean 6 | higher_is_better: true 7 | include: _default_template_docvqa_yaml 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/funqa/_default_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: fesvhtr/FunQA_LMMs 2 | dataset_kwargs: 3 | token: True 4 | video: True 5 | cache_dir: funqa 6 | metadata: 7 | version: 0.0 8 | gpt_eval_model_name: "gpt-4-0613" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/gpqa/generative/gpqa_extended_generative_n_shot.yaml: -------------------------------------------------------------------------------- 1 | # Generated by _generate_configs.py 2 | dataset_name: gpqa_extended 3 | include: _gpqa_generative_n_shot_yaml 4 | task: gpqa_extended_generative_n_shot 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_hard.yaml: -------------------------------------------------------------------------------- 1 | group: mix_evals_image2text_hard 2 | task: 3 | - mix_evals_image2text_mc_hard 4 | - mix_evals_image2text_freeform_hard 5 | # - mix_evals_image2text_openended -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_hard.yaml: -------------------------------------------------------------------------------- 1 | group: mix_evals_video2text_hard 2 | task: 3 | - mix_evals_video2text_mc_hard 4 | - mix_evals_video2text_freeform_hard 5 | # - mix_evals_video2text_openended -------------------------------------------------------------------------------- /llava-ov-15/src/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | # from .dpo_trainer import QwenDPOTrainer 2 | from .sft_trainer import QwenSFTTrainer 3 | # from .grpo_trainer import QwenGRPOTrainer 4 | 5 | __all__ = ["QwenSFTTrainer", "QwenDPOTrainer", "QwenGRPOTrainer"] -------------------------------------------------------------------------------- /lmms-eval/miscs/example_eval.yaml: -------------------------------------------------------------------------------- 1 | - model: llava 2 | model_args: pretrained=liuhaotian/llava-v1.5-7b 3 | tasks: mmmu_val 4 | batch_size: 1 5 | log_samples: true 6 | log_samples_suffix: eval_mmmu 7 | output_path: "./logs/" 8 | 9 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/_mmlu_stem.yaml: -------------------------------------------------------------------------------- 1 | group: mmlu_stem 2 | group_alias: stem 3 | task: 4 | - mmlu_stem_tasks 5 | aggregate_metric_list: 6 | - metric: acc 7 | weight_by_size: True 8 | metadata: 9 | version: 2 10 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmupd/mmupd_option.yaml: -------------------------------------------------------------------------------- 1 | group: mmupd_option 2 | task: 3 | - mmaad_option 4 | - mmiasd_option 5 | - mmivqd_option 6 | metadata: 7 | version: 0.0 8 | sys_prompt: "" 9 | gpt_eval_model_name: "gpt-3.5-turbo-0125" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/jmmmu/jmmmu_diagnostics_and_laboratory_medicine.yaml: -------------------------------------------------------------------------------- 1 | dataset_name: Diagnostics_and_Laboratory_Medicine 2 | tag: "jmmmu_culture_agnostic" 3 | task: "jmmmu_diagnostics_and_laboratory_medicine" 4 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/_mmlu_other.yaml: -------------------------------------------------------------------------------- 1 | group: mmlu_other 2 | group_alias: other 3 | task: 4 | - mmlu_other_tasks 5 | aggregate_metric_list: 6 | - metric: acc 7 | weight_by_size: True 8 | metadata: 9 | version: 2 10 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmupd/mmupd_base.yaml: -------------------------------------------------------------------------------- 1 | group: mmupd_base 2 | task: 3 | - mmaad_base 4 | - mmiasd_base 5 | - mmivqd_base 6 | metadata: 7 | version: 0.0 8 | sys_prompt: "" 9 | gpt_eval_model_name: "gpt-3.5-turbo-0125" 10 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/worldqa/_default_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/worldqa 2 | dataset_kwargs: 3 | token: True 4 | video: True 5 | cache_dir: multi-hop-reasoning 6 | metadata: 7 | version: 0.0 8 | gpt_eval_model_name: "gpt-4-0613" -------------------------------------------------------------------------------- /lmms-eval/tools/live_bench/live_bench/screen_shoter/__init__.py: -------------------------------------------------------------------------------- 1 | from live_bench.screen_shoter.screen import ScreenImage 2 | from live_bench.screen_shoter.screen_shoter import ( 3 | ScreenShoter, 4 | get_shoter, 5 | register_shoter, 6 | ) 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmbench/mmbench_cn.yaml: -------------------------------------------------------------------------------- 1 | group: mmbench_cn 2 | task: 3 | - mmbench_cn_dev 4 | - mmbench_cn_test 5 | - mmbench_cn_cc 6 | metadata: 7 | version: 0.0 8 | gpt_eval_model_name: "gpt-3.5-turbo-0613" 9 | sys_prompt: "有如下几个选项:" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmbench/mmbench_en.yaml: -------------------------------------------------------------------------------- 1 | group: mmbench_en 2 | task: 3 | - mmbench_en_dev 4 | - mmbench_en_test 5 | metadata: 6 | version: 0.0 7 | sys_prompt: "There are several options:" 8 | gpt_eval_model_name: "gpt-3.5-turbo-0613" 9 | -------------------------------------------------------------------------------- /lmms-eval/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | from setuptools import setup 3 | 4 | # This is to make sure that the package supports editable installs 5 | if __name__ == "__main__": 6 | setuptools.setup( 7 | license_files=["LICENSE"], 8 | ) 9 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/vdc/_default_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: wchai/lmms_VDC_test 2 | dataset_kwargs: 3 | token: True 4 | video: True 5 | cache_dir: vdc_test 6 | 7 | metadata: 8 | version: 0.0 9 | gpt_eval_model_name: gpt-4o-mini 10 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/egoschema/_default_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/egoschema 2 | dataset_kwargs: 3 | token: True 4 | video: True 5 | cache_dir: egoschema 6 | lmms_eval_specific_kwargs: 7 | default: 8 | pre_prompt: "" 9 | post_prompt: "" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/infovqa/infovqa_val.yaml: -------------------------------------------------------------------------------- 1 | task: "infovqa_val" 2 | test_split: validation 3 | output_type: generate_until 4 | metric_list: 5 | - metric: anls 6 | aggregation: mean 7 | higher_is_better: true 8 | include: _default_template_infovqa_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/megabench/metrics/parsing/dummy_parse.py: -------------------------------------------------------------------------------- 1 | class DummyParse: 2 | @staticmethod 3 | def parse(response: str, *args, **kwargs) -> dict: 4 | """return the raw string without doing anything""" 5 | return response.strip() 6 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/_mmlu_humanities.yaml: -------------------------------------------------------------------------------- 1 | group: mmlu_humanities 2 | group_alias: humanities 3 | task: 4 | - mmlu_humanities_tasks 5 | aggregate_metric_list: 6 | - metric: acc 7 | weight_by_size: True 8 | metadata: 9 | version: 2 10 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmupd/mmupd_instruction.yaml: -------------------------------------------------------------------------------- 1 | group: mmupd_instruction 2 | task: 3 | - mmaad_instruction 4 | - mmiasd_instruction 5 | - mmivqd_instruction 6 | metadata: 7 | version: 0.0 8 | sys_prompt: "" 9 | gpt_eval_model_name: "gpt-3.5-turbo-0125" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/urdu_llava_in_the_wild.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: "gagan3012/multilingual-llava-bench" 2 | dataset_kwargs: 3 | config: urdu 4 | token: True 5 | task: "llava_in_the_wild_urdu" 6 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmau/mmau_test_mini.yaml: -------------------------------------------------------------------------------- 1 | task: "mmau_test_mini" 2 | test_split: test_mini 3 | 4 | metric_list: 5 | - metric: accuracy 6 | aggregation: !function utils.mmau_aggregate_results 7 | higher_is_better: true 8 | 9 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/arabic_llava_in_the_wild.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: "gagan3012/multilingual-llava-bench" 2 | dataset_kwargs: 3 | config: arabic 4 | token: True 5 | task: "llava_in_the_wild_arabic" 6 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/french_llava_in_the_wild.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: "gagan3012/multilingual-llava-bench" 2 | dataset_kwargs: 3 | config: french 4 | token: True 5 | task: "llava_in_the_wild_french" 6 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/hindi_llava_in_the_wild.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: "gagan3012/multilingual-llava-bench" 2 | dataset_kwargs: 3 | config: hindi 4 | token: True 5 | task: "llava_in_the_wild_hindi" 6 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/spanish_llava_in_the_wild.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: "gagan3012/multilingual-llava-bench" 2 | dataset_kwargs: 3 | config: spanish 4 | token: True 5 | task: "llava_in_the_wild_spanish" 6 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/refcoco/_refcoco.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco 2 | task: 3 | - refcoco_seg_test 4 | - refcoco_seg_val 5 | - refcoco_seg_testA 6 | - refcoco_seg_testB 7 | - refcoco_bbox_test 8 | - refcoco_bbox_val 9 | - refcoco_bbox_testA 10 | - refcoco_bbox_testB 11 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmau/mmau_test.yaml: -------------------------------------------------------------------------------- 1 | task: "mmau_test" 2 | test_split: test 3 | 4 | metric_list: 5 | - metric: submission 6 | aggregation: !function utils.mmau_aggregate_results_for_submission 7 | higher_is_better: true 8 | 9 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/bengali_llava_in_the_wild.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: "gagan3012/multilingual-llava-bench" 2 | dataset_kwargs: 3 | config: bengali 4 | token: True 5 | task: "llava_in_the_wild_bengali" 6 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/chinese_llava_in_the_wild.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: "gagan3012/multilingual-llava-bench" 2 | dataset_kwargs: 3 | config: chinese 4 | token: True 5 | task: "llava_in_the_wild_chinese" 6 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/russian_llava_in_the_wild.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: "gagan3012/multilingual-llava-bench" 2 | dataset_kwargs: 3 | config: russian 4 | token: True 5 | task: "llava_in_the_wild_russian" 6 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/textvqa/textvqa_test.yaml: -------------------------------------------------------------------------------- 1 | task: textvqa_test 2 | test_split: test 3 | metric_list: 4 | - metric: submission 5 | aggregation: !function utils.textvqa_aggregate_submissions 6 | higher_is_better: true 7 | include: _default_template_textvqa_yaml 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/librispeech/librispeech_test_clean_long.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/Librispeech-concat 2 | task : "librispeech_test_clean_long" 3 | test_split: test_clean 4 | process_results: !function utils.librispeech_long_process_result 5 | include: _default_yaml_template -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/librispeech/librispeech_test_other_long.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/Librispeech-concat 2 | task : "librispeech_test_other_long" 3 | test_split: test_other 4 | process_results: !function utils.librispeech_long_process_result 5 | include: _default_yaml_template -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/_mmlu.yaml: -------------------------------------------------------------------------------- 1 | group: mmlu 2 | task: 3 | - mmlu_stem 4 | - mmlu_other 5 | - mmlu_social_sciences 6 | - mmlu_humanities 7 | aggregate_metric_list: 8 | - metric: acc 9 | weight_by_size: True 10 | metadata: 11 | version: 2 12 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/japanese_llava_in_the_wild.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: "gagan3012/multilingual-llava-bench" 2 | dataset_kwargs: 3 | config: japanese 4 | token: True 5 | task: "llava_in_the_wild_japanese" 6 | include: _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/perceptiontest/test/_default_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/PerceptionTest 2 | dataset_kwargs: 3 | token: True 4 | video: True 5 | cache_dir: perceptiontest 6 | lmms_eval_specific_kwargs: 7 | default: 8 | pre_prompt: "" 9 | post_prompt: "" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/megabench/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from metrics.aggregation_type import AggregationType 2 | from metrics.metric_type import MetricType 3 | from metrics.response_parse_type import ResponseParseType 4 | 5 | __all__ = [AggregationType, MetricType, ResponseParseType] 6 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/_mmlu_social_sciences.yaml: -------------------------------------------------------------------------------- 1 | group: mmlu_social_sciences 2 | group_alias: social sciences 3 | task: 4 | - mmlu_social_sciences_tasks 5 | aggregate_metric_list: 6 | - metric: acc 7 | weight_by_size: True 8 | metadata: 9 | version: 2 10 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/perceptiontest/val/_default_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/PerceptionTest_Val 2 | dataset_kwargs: 3 | token: True 4 | video: True 5 | cache_dir: perceptiontest_val 6 | lmms_eval_specific_kwargs: 7 | default: 8 | pre_prompt: "" 9 | post_prompt: "" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/llava_interleave_bench/_default_template_interleave_yaml: -------------------------------------------------------------------------------- 1 | output_type: generate_until 2 | generation_kwargs: 3 | until: 4 | - "ASSISTANT:" 5 | image_aspect_ratio: pad 6 | metadata: 7 | version: 0.0 8 | api_type : openai 9 | gpt_eval_model_name: "gpt-3.5-turbo" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mathvista/mathvista_testmini.yaml: -------------------------------------------------------------------------------- 1 | group: mathvista_testmini 2 | task: 3 | - mathvista_testmini_cot 4 | - mathvista_testmini_solution 5 | - mathvista_testmini_format 6 | metadata: 7 | version: 0.0 8 | gpt_eval_model_name: "gpt-3.5-turbo" 9 | quick_extract: false -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmbench/mmbench_cn_test.yaml: -------------------------------------------------------------------------------- 1 | task: mmbench_cn_test 2 | test_split: test 3 | metric_list: 4 | - metric: submission 5 | aggregation: !function cn_utils.mmbench_aggregate_test_results 6 | higher_is_better: true 7 | include: _default_template_mmbench_cn_yaml 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmbench/mmbench_en_test.yaml: -------------------------------------------------------------------------------- 1 | task: "mmbench_en_test" 2 | test_split: test 3 | include: _default_template_mmbench_en_yaml 4 | metric_list: 5 | - metric: submission 6 | aggregation: !function en_utils.mmbench_aggregate_test_results 7 | higher_is_better: true 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_anatomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "anatomy" 2 | "description": "The following are questions (with answers) about anatomy.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_anatomy" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_action_count.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_action_count 3 | dataset_name: action_count 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: action_count 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_moving_count.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_moving_count 3 | dataset_name: moving_count 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: moving_count 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_state_change.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_state_change 3 | dataset_name: state_change 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: state_change 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_virology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "virology" 2 | "description": "The following are questions (with answers) about virology.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_virology" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/megabench/metrics/aggregation/unsupported_agg.py: -------------------------------------------------------------------------------- 1 | from numbers import Number 2 | from typing import Dict 3 | 4 | 5 | class UnsupportedAggregation: 6 | @staticmethod 7 | def aggregate(scores: Dict[str, Number], weights: Dict[str, Number]) -> Number: 8 | return -1 9 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/megabench/metrics/scoring/unsupported_scoring.py: -------------------------------------------------------------------------------- 1 | class UnsupportedScoring: 2 | """Unsupported scoring.""" 3 | 4 | @staticmethod 5 | def match(response: str, correct_answer: str) -> int: 6 | """Default response for unimplemented metrics.""" 7 | return -1 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_astronomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "astronomy" 2 | "description": "The following are questions (with answers) about astronomy.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_astronomy" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_marketing.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "marketing" 2 | "description": "The following are questions (with answers) about marketing.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_marketing" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_nutrition.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "nutrition" 2 | "description": "The following are questions (with answers) about nutrition.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_nutrition" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmt/_default_template_yaml: -------------------------------------------------------------------------------- 1 | lmms_eval_specific_kwargs: 2 | default: 3 | pre_prompt: "" 4 | post_prompt: "\nAnswer the question using a single character from the given options." 5 | generation_kwargs: 6 | max_new_tokens: 8 7 | metadata: 8 | version: 0.0 9 | task_type: image -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_action_antonym.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_action_antonym 3 | dataset_name: action_antonym 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: action_antonym 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_object_shuffle.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_object_shuffle 3 | dataset_name: object_shuffle 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: object_shuffle 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/vitatecs/_default_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lscpku/VITATECS 2 | dataset_kwargs: 3 | token: True 4 | video: True 5 | cache_dir: vitatecs 6 | lmms_eval_specific_kwargs: 7 | default: 8 | pre_prompt: "" 9 | post_prompt: "\nPlease response with a single letter (A or B):" -------------------------------------------------------------------------------- /llava-ov-15/src/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .dpo_dataset import make_dpo_data_module 2 | from .sft_dataset import make_supervised_data_module 3 | from .grpo_dataset import make_grpo_data_module 4 | 5 | __all__ =[ 6 | "make_dpo_data_module", 7 | "make_supervised_data_module", 8 | "make_grpo_data_module" 9 | ] -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_management.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "management" 2 | "description": "The following are questions (with answers) about management.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_management" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_anatomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "anatomy" 2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\ 3 | \n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_anatomy" 7 | "task_alias": "anatomy" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_action_sequence.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_action_sequence 3 | dataset_name: action_sequence 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: action_sequence 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_character_order.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_character_order 3 | dataset_name: character_order 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: character_order 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mix_evals/audio2text/_default_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_kwargs: 2 | token: true 3 | dataset_path: lmms-lab/MixEval-X-audio2text 4 | lmms_eval_specific_kwargs: 5 | default: 6 | post_prompt: "" 7 | pre_prompt: "" 8 | metadata: 9 | gpt_eval_model_name: gpt-4o-mini 10 | version: 0 -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_human_aging.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "human_aging" 2 | "description": "The following are questions (with answers) about human\ 3 | \ aging.\n\n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_human_aging" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_philosophy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "philosophy" 2 | "description": "The following are questions (with answers) about philosophy.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_philosophy" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_prehistory.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "prehistory" 2 | "description": "The following are questions (with answers) about prehistory.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_prehistory" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_sociology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "sociology" 2 | "description": "The following are questions (with answers) about sociology.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_sociology" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_virology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "virology" 2 | "description": "The following are multiple choice questions (with answers) about virology.\n\ 3 | \n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_virology" 7 | "task_alias": "virology" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_action_prediction.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_action_prediction 3 | dataset_name: action_prediction 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: action_prediction 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_fine_grained_pose.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_fine_grained_pose 3 | dataset_name: fine_grained_pose 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: fine_grained_pose 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_moving_attribute.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_moving_attribute 3 | dataset_name: moving_attribute 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: moving_attribute 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_moving_direction.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_moving_direction 3 | dataset_name: moving_direction 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: moving_direction 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_object_existence.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_object_existence 3 | dataset_name: object_existence 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: object_existence 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_scene_transition.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_scene_transition 3 | dataset_name: scene_transition 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: scene_transition 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_unexpected_action.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_unexpected_action 3 | dataset_name: unexpected_action 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: unexpected_action 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/_task_utils/file_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def generate_submission_file(file_name, args, subpath="submissions"): 5 | path = os.path.join(args.output_path, subpath) 6 | os.makedirs(path, exist_ok=True) 7 | path = os.path.join(path, file_name) 8 | return os.path.abspath(path) 9 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/common_voice_15/common_voice_15_en.yaml: -------------------------------------------------------------------------------- 1 | task : "common_voice_15_en" 2 | dataset_name: en 3 | lmms_eval_specific_kwargs: 4 | default: 5 | pre_prompt: "" 6 | post_prompt: "" 7 | qwen2_audio: 8 | pre_prompt: "" 9 | post_prompt: " <|en|>" 10 | include : _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/common_voice_15/common_voice_15_fr.yaml: -------------------------------------------------------------------------------- 1 | task : "common_voice_15_fr" 2 | dataset_name: fr 3 | lmms_eval_specific_kwargs: 4 | default: 5 | pre_prompt: "" 6 | post_prompt: "" 7 | qwen2_audio: 8 | pre_prompt: "" 9 | post_prompt: " <|fr|>" 10 | include : _default_template_yaml -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_global_facts.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "global_facts" 2 | "description": "The following are questions (with answers) about global\ 3 | \ facts.\n\n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_global_facts" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_miscellaneous.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "miscellaneous" 2 | "description": "The following are questions (with answers) about miscellaneous.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_miscellaneous" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_astronomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "astronomy" 2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\ 3 | \n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_astronomy" 7 | "task_alias": "astronomy" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_marketing.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "marketing" 2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\ 3 | \n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_marketing" 7 | "task_alias": "marketing" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_nutrition.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "nutrition" 2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\ 3 | \n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_nutrition" 7 | "task_alias": "nutrition" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_object_interaction.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_object_interaction 3 | dataset_name: object_interaction 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: object_interaction 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_formal_logic.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "formal_logic" 2 | "description": "The following are questions (with answers) about formal\ 3 | \ logic.\n\n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_formal_logic" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_jurisprudence.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "jurisprudence" 2 | "description": "The following are questions (with answers) about jurisprudence.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_jurisprudence" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_management.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "management" 2 | "description": "The following are multiple choice questions (with answers) about management.\n\ 3 | \n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_management" 7 | "task_alias": "management" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_action_localization.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_action_localization 3 | dataset_name: action_localization 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: action_localization 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_fine_grained_action.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_fine_grained_action 3 | dataset_name: fine_grained_action 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: fine_grained_action 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/open_asr/openasr.yaml: -------------------------------------------------------------------------------- 1 | group: openasr 2 | task: 3 | - open_asr_ami 4 | - open_asr_common_voice 5 | - open_asr_earnings22 6 | - open_asr_gigaspeech 7 | - open_asr_librispeech_test_clean 8 | - open_asr_librispeech_test_other 9 | - open_asr_spgispeech 10 | - open_asr_voxpopuli 11 | - open_asr_tedlium -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/cmmmu/_default_template_cmmmu_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/CMMMU 2 | output_type: generate_until 3 | doc_to_visual: !function utils.cmmmu_doc_to_visual 4 | doc_to_text: !function utils.cmmmu_doc_to_text 5 | doc_to_target: "answer" 6 | generation_kwargs: 7 | max_new_tokens: 16 8 | image_aspect_ratio: original -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_business_ethics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "business_ethics" 2 | "description": "The following are questions (with answers) about business\ 3 | \ ethics.\n\n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_business_ethics" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_college_biology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_biology" 2 | "description": "The following are questions (with answers) about college\ 3 | \ biology.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_college_biology" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_college_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_physics" 2 | "description": "The following are questions (with answers) about college\ 3 | \ physics.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_college_physics" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_econometrics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "econometrics" 2 | "description": "The following are questions (with answers) about econometrics.\n\ 3 | \n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_econometrics" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_moral_disputes.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "moral_disputes" 2 | "description": "The following are questions (with answers) about moral\ 3 | \ disputes.\n\n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_moral_disputes" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_philosophy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "philosophy" 2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\ 3 | \n" 4 | "tag": "mmlu_humanities_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_philosophy" 7 | "task_alias": "philosophy" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_prehistory.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "prehistory" 2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\ 3 | \n" 4 | "tag": "mmlu_humanities_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_prehistory" 7 | "task_alias": "prehistory" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_sociology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "sociology" 2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\ 3 | \n" 4 | "tag": "mmlu_social_sciences_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_sociology" 7 | "task_alias": "sociology" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_anatomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "anatomy" 2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\ 3 | \n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_anatomy_generative" 7 | "task_alias": "anatomy" 8 | -------------------------------------------------------------------------------- /lmms-eval/.github/issue_template.md: -------------------------------------------------------------------------------- 1 | Before you open an issue, please check if a similar issue already exists or has been closed before. 2 | 3 | ### When you open an issue, please be sure to include the following 4 | 5 | - [ ] A descriptive title: [xxx] XXXX 6 | - [ ] A detailed description 7 | 8 | Thank you for your contributions! 9 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/common_voice_15/common_voice_15_zh-CN.yaml: -------------------------------------------------------------------------------- 1 | task : "common_voice_15_zh-CN" 2 | dataset_name: zh-CN 3 | lmms_eval_specific_kwargs: 4 | default: 5 | pre_prompt: "" 6 | post_prompt: "" 7 | qwen2_audio: 8 | pre_prompt: "" 9 | post_prompt: " <|zh|>" 10 | include : _default_template_yaml 11 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_abstract_algebra.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "abstract_algebra" 2 | "description": "The following are questions (with answers) about abstract\ 3 | \ algebra.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_abstract_algebra" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_college_medicine.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_medicine" 2 | "description": "The following are questions (with answers) about college\ 3 | \ medicine.\n\n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_college_medicine" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_machine_learning.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "machine_learning" 2 | "description": "The following are questions (with answers) about machine\ 3 | \ learning.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_machine_learning" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_medical_genetics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "medical_genetics" 2 | "description": "The following are questions (with answers) about medical\ 3 | \ genetics.\n\n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_medical_genetics" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_moral_scenarios.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "moral_scenarios" 2 | "description": "The following are questions (with answers) about moral\ 3 | \ scenarios.\n\n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_moral_scenarios" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_world_religions.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "world_religions" 2 | "description": "The following are questions (with answers) about world\ 3 | \ religions.\n\n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_world_religions" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_human_aging.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "human_aging" 2 | "description": "The following are multiple choice questions (with answers) about human\ 3 | \ aging.\n\n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_human_aging" 7 | "task_alias": "human_aging" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_virology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "virology" 2 | "description": "The following are multiple choice questions (with answers) about virology.\n\ 3 | \n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_virology_generative" 7 | "task_alias": "virology" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_egocentric_navigation.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_egocentric_navigation 3 | dataset_name: egocentric_navigation 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: egocentric_navigation 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/docvqa/docvqa_test.yaml: -------------------------------------------------------------------------------- 1 | task: "docvqa_test" 2 | test_split: test 3 | process_results: !function utils.docvqa_test_process_results 4 | metric_list: 5 | - metric: submission 6 | aggregation: !function utils.docvqa_test_aggregate_results 7 | higher_is_better: true 8 | include: _default_template_docvqa_yaml 9 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_college_chemistry.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_chemistry" 2 | "description": "The following are questions (with answers) about college\ 3 | \ chemistry.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_college_chemistry" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_computer_security.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "computer_security" 2 | "description": "The following are questions (with answers) about computer\ 3 | \ security.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_computer_security" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_conceptual_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "conceptual_physics" 2 | "description": "The following are questions (with answers) about conceptual\ 3 | \ physics.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_conceptual_physics" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_human_sexuality.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "human_sexuality" 2 | "description": "The following are questions (with answers) about human\ 3 | \ sexuality.\n\n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_human_sexuality" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_professional_law.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "professional_law" 2 | "description": "The following are questions (with answers) about professional\ 3 | \ law.\n\n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_professional_law" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_global_facts.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "global_facts" 2 | "description": "The following are multiple choice questions (with answers) about global\ 3 | \ facts.\n\n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_global_facts" 7 | "task_alias": "global_facts" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_miscellaneous.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "miscellaneous" 2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\ 3 | \n" 4 | "tag": "mmlu_other_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_miscellaneous" 7 | "task_alias": "miscellaneous" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_anatomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "anatomy" 2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_anatomy" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_astronomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "astronomy" 2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\ 3 | \n" 4 | "tag": "mmlu_stem_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_astronomy_generative" 7 | "task_alias": "astronomy" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_marketing.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "marketing" 2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\ 3 | \n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_marketing_generative" 7 | "task_alias": "marketing" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_nutrition.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "nutrition" 2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\ 3 | \n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_nutrition_generative" 7 | "task_alias": "nutrition" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmbench/mmbench.yaml: -------------------------------------------------------------------------------- 1 | group: mmbench 2 | task: 3 | - mmbench_en_dev 4 | - mmbench_en_test 5 | - mmbench_cn_dev 6 | - mmbench_cn_test 7 | - mmbench_cn_cc 8 | - mmbench_ru_dev 9 | metadata: 10 | version: 0.0 11 | sys_prompt: "There are several options:" 12 | gpt_eval_model_name: "gpt-3.5-turbo-0613" -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_clinical_knowledge.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "clinical_knowledge" 2 | "description": "The following are questions (with answers) about clinical\ 3 | \ knowledge.\n\n" 4 | "tag": "mmlu_continuation_other" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_clinical_knowledge" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_college_mathematics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_mathematics" 2 | "description": "The following are questions (with answers) about college\ 3 | \ mathematics.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_college_mathematics" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_biology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_biology" 2 | "description": "The following are questions (with answers) about high\ 3 | \ school biology.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_high_school_biology" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "high_school_physics" 2 | "description": "The following are questions (with answers) about high\ 3 | \ school physics.\n\n" 4 | "tag": "mmlu_continuation_stem" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_high_school_physics" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_international_law.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "international_law" 2 | "description": "The following are questions (with answers) about international\ 3 | \ law.\n\n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_international_law" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_logical_fallacies.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "logical_fallacies" 2 | "description": "The following are questions (with answers) about logical\ 3 | \ fallacies.\n\n" 4 | "tag": "mmlu_continuation_humanities" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_logical_fallacies" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_public_relations.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "public_relations" 2 | "description": "The following are questions (with answers) about public\ 3 | \ relations.\n\n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_public_relations" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_security_studies.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "security_studies" 2 | "description": "The following are questions (with answers) about security\ 3 | \ studies.\n\n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_security_studies" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_econometrics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "econometrics" 2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\ 3 | \n" 4 | "tag": "mmlu_social_sciences_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_econometrics" 7 | "task_alias": "econometrics" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_formal_logic.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "formal_logic" 2 | "description": "The following are multiple choice questions (with answers) about formal\ 3 | \ logic.\n\n" 4 | "tag": "mmlu_humanities_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_formal_logic" 7 | "task_alias": "formal_logic" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_jurisprudence.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "jurisprudence" 2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\ 3 | \n" 4 | "tag": "mmlu_humanities_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_jurisprudence" 7 | "task_alias": "jurisprudence" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_astronomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "astronomy" 2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_stem" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_astronomy" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_marketing.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "marketing" 2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_other" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_marketing" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_nutrition.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "nutrition" 2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_other" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_nutrition" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_virology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "virology" 2 | "description": "The following are multiple choice questions (with answers) about virology.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_other" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_virology" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_management.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "management" 2 | "description": "The following are multiple choice questions (with answers) about management.\n\ 3 | \n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_management_generative" 7 | "task_alias": "management" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mvbench/mvbench_counterfactual_inference.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: mvbench_counterfactual_inference 3 | dataset_name: counterfactual_inference 4 | test_split: train 5 | lmms_eval_specific_kwargs: 6 | default: 7 | sub_task: counterfactual_inference 8 | post_prompt: "Only give the best option.\n" -------------------------------------------------------------------------------- /lmms-eval/tools/live_bench/live_bench/data_generator/__init__.py: -------------------------------------------------------------------------------- 1 | from live_bench.data_generator.live_bench import LiveBench 2 | from live_bench.data_generator.live_bench_data import LiveBenchData 3 | from live_bench.data_generator.qa_generator import get_generator, get_random_generator 4 | from live_bench.data_generator.response import Response 5 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/cvrr/_default_template_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/CVRR-ES 2 | dataset_kwargs: 3 | token: True 4 | video: True 5 | cache_dir: cvrr-es 6 | lmms_eval_specific_kwargs: 7 | default: 8 | pre_prompt: "" 9 | post_prompt: "" 10 | 11 | metadata: 12 | version: 0.0 13 | gpt_eval_model_name: gpt-3.5-turbo-0125 -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mathverse/mathverse_testmini_text.yaml: -------------------------------------------------------------------------------- 1 | group: mathverse_testmini_text 2 | task: 3 | - mathverse_testmini_text_lite 4 | - mathverse_testmini_text_dominant 5 | - mathverse_testmini_text_only 6 | metadata: 7 | version: 0.0 8 | gpt_eval_model_name: "gpt-3.5-turbo" 9 | trunk_response: 30 10 | quick_match: false -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/continuation/mmlu_us_foreign_policy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "us_foreign_policy" 2 | "description": "The following are questions (with answers) about us\ 3 | \ foreign policy.\n\n" 4 | "tag": "mmlu_continuation_social_sciences" 5 | "include": "_continuation_template_yaml" 6 | "task": "mmlu_continuation_us_foreign_policy" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_college_biology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_biology" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ biology.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_college_biology" 7 | "task_alias": "college_biology" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/default/mmlu_college_physics.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "college_physics" 2 | "description": "The following are multiple choice questions (with answers) about college\ 3 | \ physics.\n\n" 4 | "tag": "mmlu_stem_tasks" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_college_physics" 7 | "task_alias": "college_physics" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_management.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "management" 2 | "description": "The following are multiple choice questions (with answers) about management.\n\ 3 | \n" 4 | "tag": "mmlu_flan_cot_zeroshot_other" 5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml" 6 | "task": "mmlu_flan_cot_zeroshot_management" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_anatomy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "anatomy" 2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\ 3 | \n" 4 | "tag": "mmlu_flan_n_shot_generative_stem" 5 | "include": "_mmlu_flan_generative_template_yaml" 6 | "task": "mmlu_flan_n_shot_generative_anatomy" 7 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_human_aging.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "human_aging" 2 | "description": "The following are multiple choice questions (with answers) about human\ 3 | \ aging.\n\n" 4 | "tag": "mmlu_other_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_human_aging_generative" 7 | "task_alias": "human_aging" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_philosophy.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "philosophy" 2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\ 3 | \n" 4 | "tag": "mmlu_humanities_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_philosophy_generative" 7 | "task_alias": "philosophy" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_prehistory.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "prehistory" 2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\ 3 | \n" 4 | "tag": "mmlu_humanities_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_prehistory_generative" 7 | "task_alias": "prehistory" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/mmlu/generative/mmlu_sociology.yaml: -------------------------------------------------------------------------------- 1 | "dataset_name": "sociology" 2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\ 3 | \n" 4 | "tag": "mmlu_social_sciences_generative" 5 | "include": "_default_template_yaml" 6 | "task": "mmlu_sociology_generative" 7 | "task_alias": "sociology" 8 | -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/vstar_bench/vstar_bench_direct_attributes.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: "vstar_bench_direct_attributes" 3 | dataset_kwargs: 4 | category: "direct_attributes" 5 | metric_list: 6 | - metric: vstar_direct_attributes_acc 7 | aggregation: !function utils.vstar_aggregate_results 8 | higher_is_better: true -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/tasks/vstar_bench/vstar_bench_relative_position.yaml: -------------------------------------------------------------------------------- 1 | include: _default_template_yaml 2 | task: "vstar_bench_relative_position" 3 | dataset_kwargs: 4 | category: "relative_position" 5 | metric_list: 6 | - metric: vstar_relative_position_acc 7 | aggregation: !function utils.vstar_aggregate_results 8 | higher_is_better: true -------------------------------------------------------------------------------- /lmms-eval/lmms_eval/models/video_chatgpt/constants.py: -------------------------------------------------------------------------------- 1 | CONTROLLER_HEART_BEAT_EXPIRATION = 30 2 | WORKER_HEART_BEAT_INTERVAL = 15 3 | 4 | LOGDIR = "." 5 | 6 | 7 | # Defining model 8 | DEFAULT_VIDEO_TOKEN = "