├── .github
    ├── issue_template.md
    ├── pull_request_template.md
    └── workflows
    │   └── lint.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── docs
    ├── README.md
    ├── commands.md
    ├── current_tasks.md
    ├── lmms-eval-0.3.md
    ├── model_guide.md
    ├── run_examples.md
    └── task_guide.md
├── examples
    └── models
    │   ├── aero_1_audio.sh
    │   ├── aria.sh
    │   ├── auroracap.sh
    │   ├── claude.sh
    │   ├── idefics2.sh
    │   ├── instructblip.sh
    │   ├── internvl1.5.sh
    │   ├── internvl2.sh
    │   ├── llama_vid.sh
    │   ├── llama_vision.sh
    │   ├── llava_1_5.sh
    │   ├── llava_next.sh
    │   ├── llava_onevision.sh
    │   ├── llava_video.sh
    │   ├── movie_chat.sh
    │   ├── mplug_owl.sh
    │   ├── openai_compatible.sh
    │   ├── plm.sh
    │   ├── qwen25vl.sh
    │   ├── qwen2vl.sh
    │   ├── sglang.sh
    │   ├── slime.sh
    │   ├── tensor_parallel.sh
    │   ├── tinyllava.sh
    │   ├── video_chatgpt.sh
    │   ├── video_llava.sh
    │   ├── vllm_qwen2vl.sh
    │   ├── xai_grok.sh
    │   └── xcomposer.sh
├── lmms_eval
    ├── __init__.py
    ├── __main__.py
    ├── api
    │   ├── __init__.py
    │   ├── filter.py
    │   ├── group.py
    │   ├── instance.py
    │   ├── metrics.py
    │   ├── model.py
    │   ├── registry.py
    │   ├── samplers.py
    │   └── task.py
    ├── caching
    │   ├── __init__.py
    │   └── cache.py
    ├── evaluator.py
    ├── evaluator_utils.py
    ├── filters
    │   ├── __init__.py
    │   ├── decontamination.py
    │   ├── extraction.py
    │   ├── selection.py
    │   └── transformation.py
    ├── loggers
    │   ├── __init__.py
    │   ├── evaluation_tracker.py
    │   ├── utils.py
    │   └── wandb_logger.py
    ├── logging_utils.py
    ├── models
    │   ├── __init__.py
    │   ├── aero.py
    │   ├── aria.py
    │   ├── auroracap.py
    │   ├── batch_gpt4.py
    │   ├── cambrian.py
    │   ├── claude.py
    │   ├── cogvlm2.py
    │   ├── egogpt.py
    │   ├── from_log.py
    │   ├── fuyu.py
    │   ├── gemini_api.py
    │   ├── gpt4v.py
    │   ├── idefics2.py
    │   ├── instructblip.py
    │   ├── internvideo2.py
    │   ├── internvideo2_5.py
    │   ├── internvl.py
    │   ├── internvl2.py
    │   ├── llama_vid.py
    │   ├── llama_vision.py
    │   ├── llava.py
    │   ├── llava_hf.py
    │   ├── llava_onevision.py
    │   ├── llava_onevision_moviechat.py
    │   ├── llava_sglang.py
    │   ├── llava_vid.py
    │   ├── longva.py
    │   ├── mantis.py
    │   ├── minicpm_v.py
    │   ├── minimonkey.py
    │   ├── model_utils
    │   │   ├── __init__.py
    │   │   ├── audio_processing.py
    │   │   ├── load_video.py
    │   │   └── qwen
    │   │   │   └── qwen_generate_utils.py
    │   ├── moviechat.py
    │   ├── mplug_owl_video.py
    │   ├── mplug_owl_video
    │   │   ├── __init__.py
    │   │   ├── configuration_mplug_owl.py
    │   │   ├── modeling_mplug_owl.py
    │   │   ├── processing_mplug_owl.py
    │   │   └── tokenization_mplug_owl.py
    │   ├── ola.py
    │   ├── openai_compatible.py
    │   ├── oryx.py
    │   ├── phi3v.py
    │   ├── phi4_multimodal.py
    │   ├── plm.py
    │   ├── qwen2_5_omni.py
    │   ├── qwen2_5_vl.py
    │   ├── qwen2_5_vl_interleave.py
    │   ├── qwen2_audio.py
    │   ├── qwen2_vl.py
    │   ├── qwen_vl.py
    │   ├── qwen_vl_api.py
    │   ├── reka.py
    │   ├── ross.py
    │   ├── slime.py
    │   ├── srt_api.py
    │   ├── tinyllava.py
    │   ├── video_chatgpt.py
    │   ├── video_chatgpt
    │   │   ├── __init__.py
    │   │   ├── constants.py
    │   │   ├── eval
    │   │   │   ├── __init__.py
    │   │   │   └── model_utils.py
    │   │   ├── inference.py
    │   │   ├── model
    │   │   │   ├── __init__.py
    │   │   │   ├── consolidate.py
    │   │   │   ├── make_delta.py
    │   │   │   ├── utils.py
    │   │   │   └── video_chatgpt.py
    │   │   ├── single_video_inference.py
    │   │   ├── utils.py
    │   │   └── video_conversation.py
    │   ├── video_llava.py
    │   ├── videochat2.py
    │   ├── videochat_flash.py
    │   ├── videollama3.py
    │   ├── vila.py
    │   ├── vita.py
    │   ├── vllm.py
    │   ├── vora.py
    │   ├── whisper.py
    │   ├── whisper_vllm.py
    │   ├── xcomposer2_4KHD.py
    │   └── xcomposer2d5.py
    ├── tasks
    │   ├── VisualPuzzles
    │   │   ├── VisualPuzzles_cot.yaml
    │   │   ├── VisualPuzzles_direct.yaml
    │   │   └── utils.py
    │   ├── __init__.py
    │   ├── _task_utils
    │   │   ├── file_utils.py
    │   │   ├── gpt_eval_utils.py
    │   │   ├── video_loader.py
    │   │   └── vqa_eval_metric.py
    │   ├── activitynetqa
    │   │   ├── _default_template_yaml
    │   │   ├── activitynetqa_generation.yaml
    │   │   └── utils.py
    │   ├── ai2d
    │   │   ├── ai2d.yaml
    │   │   ├── ai2d_lite.yaml
    │   │   ├── ai2d_no_mask.yaml
    │   │   ├── upload_ai2d.py
    │   │   └── utils.py
    │   ├── air_bench
    │   │   ├── _default_template_yaml
    │   │   ├── air_bench_chat.yaml
    │   │   ├── air_bench_chat_mixed.yaml
    │   │   ├── air_bench_chat_music.yaml
    │   │   ├── air_bench_chat_sound.yaml
    │   │   ├── air_bench_chat_speech.yaml
    │   │   ├── air_bench_foundation.yaml
    │   │   ├── air_bench_foundation_music.yaml
    │   │   ├── air_bench_foundation_sound.yaml
    │   │   ├── air_bench_foundation_speech.yaml
    │   │   └── utils.py
    │   ├── alpaca_audio
    │   │   ├── alpaca_audio.yaml
    │   │   └── utils.py
    │   ├── arc
    │   │   ├── README.md
    │   │   ├── arc_challenge.yaml
    │   │   └── arc_easy.yaml
    │   ├── av_odyssey
    │   │   ├── av_odyssey.yaml
    │   │   └── utils.py
    │   ├── capability
    │   │   ├── _default_template_yaml
    │   │   ├── capability.yaml
    │   │   ├── capability_OCR.yaml
    │   │   ├── capability_action.yaml
    │   │   ├── capability_camera_angle.yaml
    │   │   ├── capability_camera_movement.yaml
    │   │   ├── capability_character_identification.yaml
    │   │   ├── capability_dynamic_object_number.yaml
    │   │   ├── capability_event.yaml
    │   │   ├── capability_object_category.yaml
    │   │   ├── capability_object_color.yaml
    │   │   ├── capability_object_number.yaml
    │   │   ├── capability_scene.yaml
    │   │   ├── capability_spatial_relation.yaml
    │   │   ├── capability_style.yaml
    │   │   ├── prompt.py
    │   │   └── utils.py
    │   ├── charades_sta
    │   │   ├── charades.yaml
    │   │   ├── eval_tvg.py
    │   │   └── utils.py
    │   ├── chartqa
    │   │   ├── chartqa.yaml
    │   │   ├── chartqa_lite.yaml
    │   │   ├── upload_chartqa.py
    │   │   └── utils.py
    │   ├── cinepile
    │   │   ├── cinepile.yaml
    │   │   └── utils.py
    │   ├── clotho_aqa
    │   │   ├── _default_template_yaml
    │   │   ├── clotho_aqa.yaml
    │   │   ├── clotho_aqa_test.yaml
    │   │   ├── clotho_aqa_val.yaml
    │   │   ├── clotho_asqa_test_v2.yaml
    │   │   └── utils.py
    │   ├── cmmmu
    │   │   ├── _cmmmu.yaml
    │   │   ├── _default_template_cmmmu_yaml
    │   │   ├── cmmmu_test.yaml
    │   │   ├── cmmmu_val.yaml
    │   │   └── utils.py
    │   ├── coco_cap
    │   │   ├── coco2014_cap.yaml
    │   │   ├── coco2014_cap_test.yaml
    │   │   ├── coco2014_cap_val.yaml
    │   │   ├── coco2017_cap.yaml
    │   │   ├── coco2017_cap_test.yaml
    │   │   ├── coco2017_cap_val.yaml
    │   │   ├── coco2017_cap_val_lite.yaml
    │   │   ├── coco_cap.yaml
    │   │   ├── coco_karpathy.yaml
    │   │   ├── coco_karpathy_test.yaml
    │   │   ├── coco_karpathy_val.yaml
    │   │   └── utils.py
    │   ├── common_voice_15
    │   │   ├── _default_template_yaml
    │   │   ├── common_voice_15.yaml
    │   │   ├── common_voice_15_en.yaml
    │   │   ├── common_voice_15_fr.yaml
    │   │   ├── common_voice_15_zh-CN.yaml
    │   │   └── utils.py
    │   ├── conbench
    │   │   ├── conbench.yaml
    │   │   └── utils.py
    │   ├── covost2
    │   │   ├── _default_template_en_zh_yaml
    │   │   ├── _default_template_zh_en_yaml
    │   │   ├── covost2.yaml
    │   │   ├── covost2_en_zh.yaml
    │   │   ├── covost2_en_zh_dev.yaml
    │   │   ├── covost2_en_zh_test.yaml
    │   │   ├── covost2_zh_en.yaml
    │   │   ├── covost2_zh_en_dev.yaml
    │   │   ├── covost2_zh_en_test.yaml
    │   │   └── utils.py
    │   ├── cuva
    │   │   ├── _default_template_yaml
    │   │   ├── cuva.yaml
    │   │   ├── cuva_test.yaml
    │   │   └── utils.py
    │   ├── cvrr
    │   │   ├── _cvrr.yaml
    │   │   ├── _default_template_yaml
    │   │   ├── cvrr_fine_grained_action_understanding.yaml
    │   │   ├── cvrr_interpretation_of_social_context.yaml
    │   │   ├── cvrr_interpretation_of_visual_context.yaml
    │   │   ├── cvrr_multiple_actions_in_a_single_video.yaml
    │   │   ├── cvrr_non_existent_actions_with_existent_scene_depictions.yaml
    │   │   ├── cvrr_non_existent_actions_with_non_existent_scene_depictions.yaml
    │   │   ├── cvrr_object_instance_count.yaml
    │   │   ├── cvrr_partial_actions.yaml
    │   │   ├── cvrr_time_order_understanding.yaml
    │   │   ├── cvrr_understanding_emotional_context.yaml
    │   │   ├── cvrr_unusual_and_physically_anomalous_activities.yaml
    │   │   └── utils.py
    │   ├── detailcaps
    │   │   ├── _default_template_detailcaps_yaml
    │   │   ├── detailcaps.yaml
    │   │   └── utils.py
    │   ├── docvqa
    │   │   ├── _default_template_docvqa_yaml
    │   │   ├── docvqa.yaml
    │   │   ├── docvqa_test.yaml
    │   │   ├── docvqa_val.yaml
    │   │   ├── docvqa_val_lite.yaml
    │   │   └── utils.py
    │   ├── dtcbench
    │   │   ├── dtcbench.yaml
    │   │   └── utils.py
    │   ├── egoplan
    │   │   ├── egoplan.yaml
    │   │   └── utils.py
    │   ├── egoschema
    │   │   ├── README.md
    │   │   ├── _default_template_yaml
    │   │   ├── egoschema.yaml
    │   │   ├── egoschema_mcppl.yaml
    │   │   ├── egoschema_subset.yaml
    │   │   ├── egoschema_subset_mcppl.yaml
    │   │   └── utils.py
    │   ├── egothink
    │   │   ├── _default_template_yaml
    │   │   ├── egothink.yaml
    │   │   ├── egothink_activity.yaml
    │   │   ├── egothink_affordance.yaml
    │   │   ├── egothink_assistance.yaml
    │   │   ├── egothink_attribute.yaml
    │   │   ├── egothink_comparing.yaml
    │   │   ├── egothink_counting.yaml
    │   │   ├── egothink_existence.yaml
    │   │   ├── egothink_forecasting.yaml
    │   │   ├── egothink_location.yaml
    │   │   ├── egothink_navigation.yaml
    │   │   ├── egothink_situated.yaml
    │   │   ├── egothink_spatial.yaml
    │   │   └── utils.py
    │   ├── ferret
    │   │   ├── ferret.yaml
    │   │   ├── rule.json
    │   │   └── utils.py
    │   ├── fleurs
    │   │   ├── _default_template_yaml
    │   │   ├── fleurs.yaml
    │   │   ├── fleurs_cmn_hans_cn.yaml
    │   │   ├── fleurs_en.yaml
    │   │   ├── fleurs_yue_hant_hk.yaml
    │   │   └── utils.py
    │   ├── flickr30k
    │   │   ├── flickr30k.yaml
    │   │   ├── flickr30k_test.yaml
    │   │   ├── flickr30k_test_lite.yaml
    │   │   └── utils.py
    │   ├── funqa
    │   │   ├── _default_template_yaml
    │   │   ├── funqa.yaml
    │   │   ├── funqa_test.yaml
    │   │   └── utils.py
    │   ├── gigaspeech
    │   │   ├── gigaspeech.yaml
    │   │   ├── gigaspeech_dev.yaml
    │   │   ├── gigaspeech_l_dev.yaml
    │   │   ├── gigaspeech_l_test.yaml
    │   │   ├── gigaspeech_m_dev.yaml
    │   │   ├── gigaspeech_m_test.yaml
    │   │   ├── gigaspeech_s_dev.yaml
    │   │   ├── gigaspeech_s_test.yaml
    │   │   ├── gigaspeech_test.yaml
    │   │   ├── gigaspeech_xl_dev.yaml
    │   │   ├── gigaspeech_xl_test.yaml
    │   │   ├── gigaspeech_xs_dev.yaml
    │   │   ├── gigaspeech_xs_test.yaml
    │   │   ├── utils.py
    │   │   └── whisper_normalizer
    │   │   │   ├── basic.py
    │   │   │   ├── english.json
    │   │   │   └── english.py
    │   ├── gpqa
    │   │   ├── README.md
    │   │   ├── cot_n_shot
    │   │   │   ├── _generate_configs.py
    │   │   │   ├── _gpqa_cot_n_shot_yaml
    │   │   │   ├── gpqa_diamond_cot_n_shot.yaml
    │   │   │   ├── gpqa_extended_cot_n_shot.yaml
    │   │   │   ├── gpqa_main_cot_n_shot.yaml
    │   │   │   └── utils.py
    │   │   ├── cot_zeroshot
    │   │   │   ├── _generate_configs.py
    │   │   │   ├── _gpqa_cot_zeroshot_yaml
    │   │   │   ├── gpqa_diamond_cot_zeroshot.yaml
    │   │   │   ├── gpqa_extended_cot_zeroshot.yaml
    │   │   │   ├── gpqa_main_cot_zeroshot.yaml
    │   │   │   └── utils.py
    │   │   ├── generative
    │   │   │   ├── _generate_configs.py
    │   │   │   ├── _gpqa_generative_n_shot_yaml
    │   │   │   ├── gpqa_diamond_generative_n_shot.yaml
    │   │   │   ├── gpqa_extended_generative_n_shot.yaml
    │   │   │   ├── gpqa_main_generative_n_shot.yaml
    │   │   │   └── utils.py
    │   │   ├── n_shot
    │   │   │   ├── _generate_configs.py
    │   │   │   ├── _gpqa_n_shot_yaml
    │   │   │   ├── gpqa_diamond_n_shot.yaml
    │   │   │   ├── gpqa_extended_n_shot.yaml
    │   │   │   ├── gpqa_main_n_shot.yaml
    │   │   │   └── utils.py
    │   │   └── zeroshot
    │   │   │   ├── _generate_configs.py
    │   │   │   ├── _gpqa_zeroshot_yaml
    │   │   │   ├── gpqa_diamond_zeroshot.yaml
    │   │   │   ├── gpqa_extended_zeroshot.yaml
    │   │   │   ├── gpqa_main_zeroshot.yaml
    │   │   │   └── utils.py
    │   ├── gqa
    │   │   ├── gqa.yaml
    │   │   ├── gqa_lite.yaml
    │   │   └── utils.py
    │   ├── gqa_ru
    │   │   ├── gqa_ru.yaml
    │   │   └── utils.py
    │   ├── gsm8k
    │   │   ├── README.md
    │   │   ├── gsm8k-cot-llama.yaml
    │   │   ├── gsm8k-cot-self-consistency.yaml
    │   │   ├── gsm8k-cot-zeroshot.yaml
    │   │   ├── gsm8k-cot.yaml
    │   │   └── gsm8k.yaml
    │   ├── hallusion_bench
    │   │   ├── evaluate_hb.py
    │   │   ├── hallusion_bench_image.yaml
    │   │   └── utils.py
    │   ├── hellaswag
    │   │   ├── README.md
    │   │   ├── hellaswag.yaml
    │   │   └── utils.py
    │   ├── hrbench
    │   │   ├── hrbench.yaml
    │   │   ├── hrbench4k.yaml
    │   │   ├── hrbench8k.yaml
    │   │   ├── hrbench_evals.py
    │   │   └── utils.py
    │   ├── iconqa
    │   │   ├── _default_template_docvqa_yaml
    │   │   ├── iconqa.yaml
    │   │   ├── iconqa_test.yaml
    │   │   ├── iconqa_val.yaml
    │   │   └── utils.py
    │   ├── ifeval
    │   │   ├── README.md
    │   │   ├── ifeval.yaml
    │   │   ├── instructions.py
    │   │   ├── instructions_registry.py
    │   │   ├── instructions_util.py
    │   │   └── utils.py
    │   ├── ii_bench
    │   │   ├── ii_bench.yaml
    │   │   └── utils.py
    │   ├── illusionvqa
    │   │   ├── illusionvqa.yaml
    │   │   ├── illusionvqa_comprehension.yaml
    │   │   ├── illusionvqa_soft_localization.yaml
    │   │   └── utils.py
    │   ├── infovqa
    │   │   ├── _default_template_infovqa_yaml
    │   │   ├── infovqa.yaml
    │   │   ├── infovqa_test.yaml
    │   │   ├── infovqa_val.yaml
    │   │   ├── infovqa_val_lite.yaml
    │   │   └── utils.py
    │   ├── internal_eval
    │   │   ├── _default_template_internal_eval_yaml
    │   │   ├── d170_cn.yaml
    │   │   ├── d170_cn_utils.py
    │   │   ├── d170_en.yaml
    │   │   ├── d170_en_utils.py
    │   │   ├── dc100_en.yaml
    │   │   ├── dc100_en_utils.py
    │   │   ├── dc200_cn.yaml
    │   │   ├── dc200_cn_utils.py
    │   │   ├── internal_eval.yaml
    │   │   └── utils.py
    │   ├── jmmmu
    │   │   ├── _default_template_yaml
    │   │   ├── jmmmu.yaml
    │   │   ├── jmmmu_accounting.yaml
    │   │   ├── jmmmu_agriculture.yaml
    │   │   ├── jmmmu_architecture_and_engineering.yaml
    │   │   ├── jmmmu_basic_medical_science.yaml
    │   │   ├── jmmmu_biology.yaml
    │   │   ├── jmmmu_chemistry.yaml
    │   │   ├── jmmmu_clinical_medicine.yaml
    │   │   ├── jmmmu_computer_science.yaml
    │   │   ├── jmmmu_design.yaml
    │   │   ├── jmmmu_diagnostics_and_laboratory_medicine.yaml
    │   │   ├── jmmmu_economics.yaml
    │   │   ├── jmmmu_electronics.yaml
    │   │   ├── jmmmu_energy_and_power.yaml
    │   │   ├── jmmmu_finance.yaml
    │   │   ├── jmmmu_japanese_art.yaml
    │   │   ├── jmmmu_japanese_heritage.yaml
    │   │   ├── jmmmu_japanese_history.yaml
    │   │   ├── jmmmu_manage.yaml
    │   │   ├── jmmmu_marketing.yaml
    │   │   ├── jmmmu_materials.yaml
    │   │   ├── jmmmu_math.yaml
    │   │   ├── jmmmu_mechanical_engineering.yaml
    │   │   ├── jmmmu_music.yaml
    │   │   ├── jmmmu_pharmacy.yaml
    │   │   ├── jmmmu_physics.yaml
    │   │   ├── jmmmu_psychology.yaml
    │   │   ├── jmmmu_public_health.yaml
    │   │   ├── jmmmu_world_history.yaml
    │   │   └── utils.py
    │   ├── k12
    │   │   ├── k12.yaml
    │   │   └── utils.py
    │   ├── librispeech
    │   │   ├── _default_yaml_template
    │   │   ├── cn_tn.py
    │   │   ├── librispeech.yaml
    │   │   ├── librispeech_dev_clean.yaml
    │   │   ├── librispeech_dev_other.yaml
    │   │   ├── librispeech_long.yaml
    │   │   ├── librispeech_test_clean.yaml
    │   │   ├── librispeech_test_clean_long.yaml
    │   │   ├── librispeech_test_other.yaml
    │   │   ├── librispeech_test_other_long.yaml
    │   │   ├── utils.py
    │   │   └── whisper_normalizer
    │   │   │   ├── basic.py
    │   │   │   ├── english.json
    │   │   │   └── english.py
    │   ├── live_bench
    │   │   ├── live_bench.yaml
    │   │   ├── live_bench_2406.yaml
    │   │   ├── live_bench_2407.yaml
    │   │   ├── live_bench_2409.yaml
    │   │   ├── live_bench_template_yaml
    │   │   ├── live_bench_template_yaml_v2
    │   │   ├── utils.py
    │   │   └── utils_v2.py
    │   ├── livexiv_tqa
    │   │   ├── livexiv_tqa.yaml
    │   │   ├── livexiv_tqa_template_yaml
    │   │   ├── livexiv_tqa_v1.yaml
    │   │   ├── livexiv_tqa_v2.yaml
    │   │   ├── livexiv_tqa_v3.yaml
    │   │   ├── livexiv_tqa_v4.yaml
    │   │   ├── livexiv_tqa_v5.yaml
    │   │   ├── livexiv_tqa_v6.yaml
    │   │   └── utils.py
    │   ├── livexiv_vqa
    │   │   ├── livexiv_vqa.yaml
    │   │   ├── livexiv_vqa_template_yaml
    │   │   ├── livexiv_vqa_v1.yaml
    │   │   ├── livexiv_vqa_v2.yaml
    │   │   ├── livexiv_vqa_v3.yaml
    │   │   ├── livexiv_vqa_v4.yaml
    │   │   ├── livexiv_vqa_v5.yaml
    │   │   ├── livexiv_vqa_v6.yaml
    │   │   └── utils.py
    │   ├── llava-bench-coco
    │   │   ├── llava-bench-coco.yaml
    │   │   ├── rule.json
    │   │   └── utils.py
    │   ├── llava-in-the-wild
    │   │   ├── llava-in-the-wild.yaml
    │   │   ├── llava-in-the-wild_ko.yaml
    │   │   ├── rule.json
    │   │   ├── rule_ko.json
    │   │   ├── utils.py
    │   │   └── utils_ko.py
    │   ├── llava_interleave_bench
    │   │   ├── _default_template_interleave_yaml
    │   │   ├── in_domain.yaml
    │   │   ├── interleave_bench.yaml
    │   │   ├── multi_view_in_domain.yaml
    │   │   ├── out_of_domain.yaml
    │   │   └── utils.py
    │   ├── llava_wilder
    │   │   ├── _default_template_wilder_yaml
    │   │   ├── llava_wilder_small.yaml
    │   │   └── utils.py
    │   ├── longvideobench
    │   │   ├── longvideobench_test_i.yaml
    │   │   ├── longvideobench_test_v.yaml
    │   │   ├── longvideobench_val_i.yaml
    │   │   ├── longvideobench_val_v.yaml
    │   │   └── utils.py
    │   ├── mathverse
    │   │   ├── mathverse.yaml
    │   │   ├── mathverse_evals.py
    │   │   ├── mathverse_testmini.yaml
    │   │   ├── mathverse_testmini_text.yaml
    │   │   ├── mathverse_testmini_text_dominant.yaml
    │   │   ├── mathverse_testmini_text_lite.yaml
    │   │   ├── mathverse_testmini_text_only.yaml
    │   │   ├── mathverse_testmini_vision.yaml
    │   │   ├── mathverse_testmini_vision_dominant.yaml
    │   │   ├── mathverse_testmini_vision_intensive.yaml
    │   │   ├── mathverse_testmini_vision_only.yaml
    │   │   └── utils.py
    │   ├── mathvision
    │   │   ├── eval_utils.py
    │   │   ├── mathvision_reason_test.yaml
    │   │   ├── mathvision_reason_testmini.yaml
    │   │   ├── mathvision_test.yaml
    │   │   ├── mathvision_testmini.yaml
    │   │   └── utils.py
    │   ├── mathvista
    │   │   ├── mathvista.yaml
    │   │   ├── mathvista_evals.py
    │   │   ├── mathvista_test.yaml
    │   │   ├── mathvista_testmini.yaml
    │   │   ├── mathvista_testmini_cot.yaml
    │   │   ├── mathvista_testmini_format.yaml
    │   │   ├── mathvista_testmini_solution.yaml
    │   │   └── utils.py
    │   ├── megabench
    │   │   ├── README.md
    │   │   ├── _default_template_yaml
    │   │   ├── breakdown
    │   │   │   ├── all_task_meta.json
    │   │   │   ├── analysis_utils.py
    │   │   │   └── derive_breakdown_results.py
    │   │   ├── evaluator.py
    │   │   ├── image_video_utils.py
    │   │   ├── megabench.yaml
    │   │   ├── megabench_core.yaml
    │   │   ├── megabench_core_si.yaml
    │   │   ├── megabench_open.yaml
    │   │   ├── megabench_open_si.yaml
    │   │   ├── metrics
    │   │   │   ├── README.md
    │   │   │   ├── __init__.py
    │   │   │   ├── aggregation
    │   │   │   │   ├── mean_agg.py
    │   │   │   │   ├── min_agg.py
    │   │   │   │   └── unsupported_agg.py
    │   │   │   ├── aggregation_type.py
    │   │   │   ├── metric_type.py
    │   │   │   ├── parsing
    │   │   │   │   ├── answer_str_parse.py
    │   │   │   │   ├── common
    │   │   │   │   │   ├── parsers.py
    │   │   │   │   │   └── utils.py
    │   │   │   │   ├── dummy_parse.py
    │   │   │   │   └── json_parse.py
    │   │   │   ├── response_parse_type.py
    │   │   │   └── scoring
    │   │   │   │   ├── ascii_art_vlm_judge.py
    │   │   │   │   ├── chess_jaccard.py
    │   │   │   │   ├── common
    │   │   │   │       ├── conversions.py
    │   │   │   │       ├── metrics.py
    │   │   │   │       └── transformations.py
    │   │   │   │   ├── constrained_generation.py
    │   │   │   │   ├── coordinate_sequence_match.py
    │   │   │   │   ├── dict_equality.py
    │   │   │   │   ├── dict_exact_match_agg_recall.py
    │   │   │   │   ├── dict_jaccard_agg_jaccard.py
    │   │   │   │   ├── dict_nbbox_iou_tuple_agg_jaccard.py
    │   │   │   │   ├── dict_set_equality_agg_jaccard.py
    │   │   │   │   ├── exact_str_match.py
    │   │   │   │   ├── exact_str_match_case_insensitive.py
    │   │   │   │   ├── general_numerical_match.py
    │   │   │   │   ├── geo_proximity.py
    │   │   │   │   ├── gleu.py
    │   │   │   │   ├── jaccard.py
    │   │   │   │   ├── latex_expr_equality.py
    │   │   │   │   ├── longest_common_list_prefix_ratio.py
    │   │   │   │   ├── mse.py
    │   │   │   │   ├── multi_ref_phrase.py
    │   │   │   │   ├── nbbox_iou.py
    │   │   │   │   ├── near_str_match.py
    │   │   │   │   ├── nli_entailment.py
    │   │   │   │   ├── normalized_similarity_damerau_levenshtein.py
    │   │   │   │   ├── number_rel_diff_ratio.py
    │   │   │   │   ├── positive_int_match.py
    │   │   │   │   ├── program_judge.py
    │   │   │   │   ├── sacrebleu_bleu.py
    │   │   │   │   ├── sequence_equality.py
    │   │   │   │   ├── set_equality.py
    │   │   │   │   ├── set_precision.py
    │   │   │   │   ├── simple_str_match.py
    │   │   │   │   ├── symbolic_planning.py
    │   │   │   │   ├── unsupported_scoring.py
    │   │   │   │   ├── vlm_as_judge.py
    │   │   │   │   ├── xml_nbbox_iou.py
    │   │   │   │   ├── xml_norm_point_distance.py
    │   │   │   │   └── xml_norm_point_in_bbox.py
    │   │   ├── requirements.txt
    │   │   └── utils.py
    │   ├── mia_bench
    │   │   ├── mia_bench.yaml
    │   │   └── utils.py
    │   ├── mirb
    │   │   ├── mirb.yaml
    │   │   └── utils.py
    │   ├── mix_evals
    │   │   ├── README.md
    │   │   ├── audio2text
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── mix_evals_audio2_text_freeform.yaml
    │   │   │   ├── mix_evals_audio2_text_freeform_hard.yaml
    │   │   │   ├── mix_evals_audio2text.yaml
    │   │   │   ├── mix_evals_audio2text_hard.yaml
    │   │   │   └── utils.py
    │   │   ├── image2text
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── mix_evals_image2text.yaml
    │   │   │   ├── mix_evals_image2text_freeform.yaml
    │   │   │   ├── mix_evals_image2text_freeform_hard.yaml
    │   │   │   ├── mix_evals_image2text_hard.yaml
    │   │   │   ├── mix_evals_image2text_mc.yaml
    │   │   │   ├── mix_evals_image2text_mc_hard.yaml
    │   │   │   └── utils.py
    │   │   └── video2text
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── mix_evals_video2text.yaml
    │   │   │   ├── mix_evals_video2text_freeform.yaml
    │   │   │   ├── mix_evals_video2text_freeform_hard.yaml
    │   │   │   ├── mix_evals_video2text_hard.yaml
    │   │   │   ├── mix_evals_video2text_mc.yaml
    │   │   │   ├── mix_evals_video2text_mc_hard.yaml
    │   │   │   ├── mix_evals_video2text_openended.yaml
    │   │   │   └── utils.py
    │   ├── mlvu
    │   │   ├── mlvu_dev.yaml
    │   │   ├── mlvu_test.yaml
    │   │   └── utils.py
    │   ├── mmau
    │   │   ├── _default_template_yaml
    │   │   ├── mmau.yaml
    │   │   ├── mmau_test.yaml
    │   │   ├── mmau_test_mini.yaml
    │   │   └── utils.py
    │   ├── mmbench
    │   │   ├── _default_template_mmbench_cn_yaml
    │   │   ├── _default_template_mmbench_en_yaml
    │   │   ├── _default_template_mmbench_ko_yaml
    │   │   ├── _default_template_mmbench_ru_yaml
    │   │   ├── cc_utils.py
    │   │   ├── cn_utils.py
    │   │   ├── en_utils.py
    │   │   ├── ko_utils.py
    │   │   ├── mmbench.yaml
    │   │   ├── mmbench_cc.yaml
    │   │   ├── mmbench_cn.yaml
    │   │   ├── mmbench_cn_dev.yaml
    │   │   ├── mmbench_cn_dev_lite.yaml
    │   │   ├── mmbench_cn_test.yaml
    │   │   ├── mmbench_en.yaml
    │   │   ├── mmbench_en_dev.yaml
    │   │   ├── mmbench_en_dev_lite.yaml
    │   │   ├── mmbench_en_test.yaml
    │   │   ├── mmbench_evals.py
    │   │   ├── mmbench_ko_dev.yaml
    │   │   ├── mmbench_ru_dev.yaml
    │   │   └── ru_utils.py
    │   ├── mme
    │   │   ├── mme.yaml
    │   │   └── utils.py
    │   ├── mme_cot
    │   │   ├── README.md
    │   │   ├── mme_cot_direct.yaml
    │   │   ├── mme_cot_reason.yaml
    │   │   └── utils.py
    │   ├── mme_realworld
    │   │   ├── mme_realworld.yaml
    │   │   ├── mme_realworld_cn.yaml
    │   │   ├── mme_realworld_lite.yaml
    │   │   └── utils.py
    │   ├── mmlu
    │   │   ├── _generate_configs.py
    │   │   ├── continuation
    │   │   │   ├── _continuation_template_yaml
    │   │   │   ├── _mmlu.yaml
    │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   ├── mmlu_international_law.yaml
    │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   ├── mmlu_management.yaml
    │   │   │   ├── mmlu_marketing.yaml
    │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   ├── mmlu_sociology.yaml
    │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   ├── mmlu_virology.yaml
    │   │   │   └── mmlu_world_religions.yaml
    │   │   ├── default
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── _mmlu.yaml
    │   │   │   ├── _mmlu_humanities.yaml
    │   │   │   ├── _mmlu_other.yaml
    │   │   │   ├── _mmlu_social_sciences.yaml
    │   │   │   ├── _mmlu_stem.yaml
    │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   ├── mmlu_international_law.yaml
    │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   ├── mmlu_management.yaml
    │   │   │   ├── mmlu_marketing.yaml
    │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   ├── mmlu_sociology.yaml
    │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   ├── mmlu_virology.yaml
    │   │   │   └── mmlu_world_religions.yaml
    │   │   ├── flan_cot_fewshot
    │   │   │   ├── _cot_prompts.json
    │   │   │   ├── _mmlu.yaml
    │   │   │   ├── _mmlu_flan_cot_fewshot_template_yaml
    │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   ├── mmlu_international_law.yaml
    │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   ├── mmlu_management.yaml
    │   │   │   ├── mmlu_marketing.yaml
    │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   ├── mmlu_sociology.yaml
    │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   ├── mmlu_virology.yaml
    │   │   │   └── mmlu_world_religions.yaml
    │   │   ├── flan_cot_zeroshot
    │   │   │   ├── _mmlu.yaml
    │   │   │   ├── _mmlu_flan_cot_zeroshot_template_yaml
    │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   ├── mmlu_international_law.yaml
    │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   ├── mmlu_management.yaml
    │   │   │   ├── mmlu_marketing.yaml
    │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   ├── mmlu_sociology.yaml
    │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   ├── mmlu_virology.yaml
    │   │   │   ├── mmlu_world_religions.yaml
    │   │   │   └── utils.py
    │   │   ├── flan_n_shot
    │   │   │   ├── generative
    │   │   │   │   ├── _mmlu.yaml
    │   │   │   │   ├── _mmlu_flan_generative_template_yaml
    │   │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   │   ├── mmlu_international_law.yaml
    │   │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   │   ├── mmlu_management.yaml
    │   │   │   │   ├── mmlu_marketing.yaml
    │   │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   │   ├── mmlu_sociology.yaml
    │   │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   │   ├── mmlu_virology.yaml
    │   │   │   │   ├── mmlu_world_religions.yaml
    │   │   │   │   └── utils.py
    │   │   │   └── loglikelihood
    │   │   │   │   ├── _mmlu.yaml
    │   │   │   │   ├── _mmlu_flan_loglikelihood_template_yaml
    │   │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   │   ├── mmlu_international_law.yaml
    │   │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   │   ├── mmlu_management.yaml
    │   │   │   │   ├── mmlu_marketing.yaml
    │   │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   │   ├── mmlu_sociology.yaml
    │   │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   │   ├── mmlu_virology.yaml
    │   │   │   │   └── mmlu_world_religions.yaml
    │   │   └── generative
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── _mmlu.yaml
    │   │   │   ├── mmlu_abstract_algebra.yaml
    │   │   │   ├── mmlu_anatomy.yaml
    │   │   │   ├── mmlu_astronomy.yaml
    │   │   │   ├── mmlu_business_ethics.yaml
    │   │   │   ├── mmlu_clinical_knowledge.yaml
    │   │   │   ├── mmlu_college_biology.yaml
    │   │   │   ├── mmlu_college_chemistry.yaml
    │   │   │   ├── mmlu_college_computer_science.yaml
    │   │   │   ├── mmlu_college_mathematics.yaml
    │   │   │   ├── mmlu_college_medicine.yaml
    │   │   │   ├── mmlu_college_physics.yaml
    │   │   │   ├── mmlu_computer_security.yaml
    │   │   │   ├── mmlu_conceptual_physics.yaml
    │   │   │   ├── mmlu_econometrics.yaml
    │   │   │   ├── mmlu_electrical_engineering.yaml
    │   │   │   ├── mmlu_elementary_mathematics.yaml
    │   │   │   ├── mmlu_formal_logic.yaml
    │   │   │   ├── mmlu_global_facts.yaml
    │   │   │   ├── mmlu_high_school_biology.yaml
    │   │   │   ├── mmlu_high_school_chemistry.yaml
    │   │   │   ├── mmlu_high_school_computer_science.yaml
    │   │   │   ├── mmlu_high_school_european_history.yaml
    │   │   │   ├── mmlu_high_school_geography.yaml
    │   │   │   ├── mmlu_high_school_government_and_politics.yaml
    │   │   │   ├── mmlu_high_school_macroeconomics.yaml
    │   │   │   ├── mmlu_high_school_mathematics.yaml
    │   │   │   ├── mmlu_high_school_microeconomics.yaml
    │   │   │   ├── mmlu_high_school_physics.yaml
    │   │   │   ├── mmlu_high_school_psychology.yaml
    │   │   │   ├── mmlu_high_school_statistics.yaml
    │   │   │   ├── mmlu_high_school_us_history.yaml
    │   │   │   ├── mmlu_high_school_world_history.yaml
    │   │   │   ├── mmlu_human_aging.yaml
    │   │   │   ├── mmlu_human_sexuality.yaml
    │   │   │   ├── mmlu_international_law.yaml
    │   │   │   ├── mmlu_jurisprudence.yaml
    │   │   │   ├── mmlu_logical_fallacies.yaml
    │   │   │   ├── mmlu_machine_learning.yaml
    │   │   │   ├── mmlu_management.yaml
    │   │   │   ├── mmlu_marketing.yaml
    │   │   │   ├── mmlu_medical_genetics.yaml
    │   │   │   ├── mmlu_miscellaneous.yaml
    │   │   │   ├── mmlu_moral_disputes.yaml
    │   │   │   ├── mmlu_moral_scenarios.yaml
    │   │   │   ├── mmlu_nutrition.yaml
    │   │   │   ├── mmlu_philosophy.yaml
    │   │   │   ├── mmlu_prehistory.yaml
    │   │   │   ├── mmlu_professional_accounting.yaml
    │   │   │   ├── mmlu_professional_law.yaml
    │   │   │   ├── mmlu_professional_medicine.yaml
    │   │   │   ├── mmlu_professional_psychology.yaml
    │   │   │   ├── mmlu_public_relations.yaml
    │   │   │   ├── mmlu_security_studies.yaml
    │   │   │   ├── mmlu_sociology.yaml
    │   │   │   ├── mmlu_us_foreign_policy.yaml
    │   │   │   ├── mmlu_virology.yaml
    │   │   │   └── mmlu_world_religions.yaml
    │   ├── mmlu_pro
    │   │   ├── README.md
    │   │   ├── _default_template_yaml
    │   │   ├── _mmlu_pro.yaml
    │   │   ├── mmlu_pro_biology.yaml
    │   │   ├── mmlu_pro_business.yaml
    │   │   ├── mmlu_pro_chemistry.yaml
    │   │   ├── mmlu_pro_computer_science.yaml
    │   │   ├── mmlu_pro_economics.yaml
    │   │   ├── mmlu_pro_engineering.yaml
    │   │   ├── mmlu_pro_health.yaml
    │   │   ├── mmlu_pro_history.yaml
    │   │   ├── mmlu_pro_law.yaml
    │   │   ├── mmlu_pro_math.yaml
    │   │   ├── mmlu_pro_other.yaml
    │   │   ├── mmlu_pro_philosophy.yaml
    │   │   ├── mmlu_pro_physics.yaml
    │   │   ├── mmlu_pro_psychology.yaml
    │   │   └── utils.py
    │   ├── mmmu
    │   │   ├── _default_template_yaml
    │   │   ├── arial.ttf
    │   │   ├── mmmu.yaml
    │   │   ├── mmmu_group_img.yaml
    │   │   ├── mmmu_group_img_test.yaml
    │   │   ├── mmmu_group_img_val.yaml
    │   │   ├── mmmu_test.yaml
    │   │   ├── mmmu_val.yaml
    │   │   ├── mmmu_val_pass64.yaml
    │   │   ├── mmmu_val_thinking.yaml
    │   │   ├── utils.py
    │   │   └── utils_group_img.py
    │   ├── mmmu_pro
    │   │   ├── _default_template_yaml
    │   │   ├── mmmu_pro.yaml
    │   │   ├── mmmu_pro_composite.yaml
    │   │   ├── mmmu_pro_composite_cot.yaml
    │   │   ├── mmmu_pro_cot.yaml
    │   │   ├── mmmu_pro_standard.yaml
    │   │   ├── mmmu_pro_standard_cot.yaml
    │   │   ├── mmmu_pro_vision.yaml
    │   │   ├── mmmu_pro_vision_cot.yaml
    │   │   └── utils.py
    │   ├── mmsearch
    │   │   ├── constants.py
    │   │   ├── get_final_scores.py
    │   │   ├── lmms_eval_utils.py
    │   │   ├── mmsearch.yaml
    │   │   ├── mmsearch_end2end.yaml
    │   │   ├── mmsearch_rerank.yaml
    │   │   ├── mmsearch_summarization.yaml
    │   │   ├── prompts
    │   │   │   ├── prompt.py
    │   │   │   └── prompt_w_imagesearch.py
    │   │   ├── retrieve_content
    │   │   │   ├── retriever.py
    │   │   │   └── tokenization
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── tokenizers.py
    │   │   │   │   └── utils.py
    │   │   ├── score
    │   │   │   ├── f1_score.py
    │   │   │   ├── req_score.py
    │   │   │   └── result_summary.py
    │   │   └── utils
    │   │   │   ├── image_utils.py
    │   │   │   ├── lmms_eval_utils.py
    │   │   │   ├── prompt_utils.py
    │   │   │   ├── utils.py
    │   │   │   └── web_content_utils.py
    │   ├── mmstar
    │   │   ├── ko_utils.py
    │   │   ├── mmstar.yaml
    │   │   ├── mmstar_ko.yaml
    │   │   └── utils.py
    │   ├── mmt
    │   │   ├── _default_template_yaml
    │   │   ├── mmt.yaml
    │   │   ├── mmt_mi.yaml
    │   │   ├── mmt_mi_test.yaml
    │   │   ├── mmt_mi_val.yaml
    │   │   ├── mmt_test.yaml
    │   │   ├── mmt_val.yaml
    │   │   └── utils.py
    │   ├── mmupd
    │   │   ├── _default_template_mmupd_yaml
    │   │   ├── mmaad_base.yaml
    │   │   ├── mmaad_instruction.yaml
    │   │   ├── mmaad_option.yaml
    │   │   ├── mmiasd_base.yaml
    │   │   ├── mmiasd_instruction.yaml
    │   │   ├── mmiasd_option.yaml
    │   │   ├── mmivqd_base.yaml
    │   │   ├── mmivqd_instruction.yaml
    │   │   ├── mmivqd_option.yaml
    │   │   ├── mmupd.yaml
    │   │   ├── mmupd_base.yaml
    │   │   ├── mmupd_evals.py
    │   │   ├── mmupd_instruction.yaml
    │   │   ├── mmupd_option.yaml
    │   │   └── utils.py
    │   ├── mmvet
    │   │   ├── mmvet.yaml
    │   │   └── utils.py
    │   ├── mmvetv2
    │   │   ├── mmvetv2.yaml
    │   │   ├── mmvetv2_group_img.yaml
    │   │   └── utils.py
    │   ├── mmworld
    │   │   ├── mmworld.yaml
    │   │   └── utils.py
    │   ├── moviechat
    │   │   ├── README.md
    │   │   ├── _default_template_yaml
    │   │   ├── moviechat_breakpoint.yaml
    │   │   ├── moviechat_global.yaml
    │   │   └── utils.py
    │   ├── muchomusic
    │   │   ├── muchomusic.yaml
    │   │   └── utils.py
    │   ├── muirbench
    │   │   ├── muirbench.yaml
    │   │   └── utils.py
    │   ├── multidocvqa
    │   │   ├── multidocvqa.yaml
    │   │   ├── multidocvqa_test.yaml
    │   │   ├── multidocvqa_val.yaml
    │   │   └── utils.py
    │   ├── multilingual-llava-bench-in-the-wild
    │   │   ├── README.md
    │   │   ├── _default_template_yaml
    │   │   ├── arabic_llava_in_the_wild.yaml
    │   │   ├── bengali_llava_in_the_wild.yaml
    │   │   ├── chinese_llava_in_the_wild.yaml
    │   │   ├── french_llava_in_the_wild.yaml
    │   │   ├── hindi_llava_in_the_wild.yaml
    │   │   ├── japanese_llava_in_the_wild.yaml
    │   │   ├── rule.json
    │   │   ├── russian_llava_in_the_wild.yaml
    │   │   ├── spanish_llava_in_the_wild.yaml
    │   │   ├── urdu_llava_in_the_wild.yaml
    │   │   └── utils.py
    │   ├── multimodal_rewardbench
    │   │   ├── multimodal_rewardbench.yaml
    │   │   └── utils.py
    │   ├── mvbench
    │   │   ├── _default_template_yaml
    │   │   ├── mvbench.yaml
    │   │   ├── mvbench_action_antonym.yaml
    │   │   ├── mvbench_action_count.yaml
    │   │   ├── mvbench_action_localization.yaml
    │   │   ├── mvbench_action_prediction.yaml
    │   │   ├── mvbench_action_sequence.yaml
    │   │   ├── mvbench_character_order.yaml
    │   │   ├── mvbench_counterfactual_inference.yaml
    │   │   ├── mvbench_egocentric_navigation.yaml
    │   │   ├── mvbench_episodic_reasoning.yaml
    │   │   ├── mvbench_fine_grained_action.yaml
    │   │   ├── mvbench_fine_grained_pose.yaml
    │   │   ├── mvbench_moving_attribute.yaml
    │   │   ├── mvbench_moving_count.yaml
    │   │   ├── mvbench_moving_direction.yaml
    │   │   ├── mvbench_object_existence.yaml
    │   │   ├── mvbench_object_interaction.yaml
    │   │   ├── mvbench_object_shuffle.yaml
    │   │   ├── mvbench_scene_transition.yaml
    │   │   ├── mvbench_state_change.yaml
    │   │   ├── mvbench_unexpected_action.yaml
    │   │   └── utils.py
    │   ├── naturalbench
    │   │   ├── naturalbench.yaml
    │   │   └── utils.py
    │   ├── nextqa
    │   │   ├── _default_template_yaml
    │   │   ├── nextqa.yaml
    │   │   ├── nextqa_mc_test.yaml
    │   │   ├── nextqa_oe_test.yaml
    │   │   ├── nextqa_oe_val.yaml
    │   │   ├── stopwords.csv
    │   │   └── utils.py
    │   ├── nocaps
    │   │   ├── _default_template_nocaps_yaml
    │   │   ├── nocaps.yaml
    │   │   ├── nocaps_test.yaml
    │   │   ├── nocaps_val.yaml
    │   │   ├── nocaps_val_lite.yaml
    │   │   └── utils.py
    │   ├── ocrbench
    │   │   ├── ocrbench.yaml
    │   │   ├── upload_ocrbench.py
    │   │   └── utils.py
    │   ├── ocrbench_v2
    │   │   ├── IoUscore_metric.py
    │   │   ├── TEDS_metric.py
    │   │   ├── __init__.py
    │   │   ├── ocrbench_v2.yaml
    │   │   ├── page_ocr_metric.py
    │   │   ├── parallel.py
    │   │   ├── spotting_eval
    │   │   │   ├── __init__.py
    │   │   │   ├── readme.txt
    │   │   │   ├── rrc_evaluation_funcs_1_1.py
    │   │   │   └── script.py
    │   │   ├── spotting_metric.py
    │   │   ├── upload_ocrbench_v2.py
    │   │   ├── utils.py
    │   │   └── vqa_metric.py
    │   ├── ok_vqa
    │   │   ├── _default_template_vqa_yaml
    │   │   ├── _generate_config.py
    │   │   ├── _ok_vqa.yaml
    │   │   ├── ok_vqa_val2014.yaml
    │   │   ├── ok_vqa_val2014_lite.yaml
    │   │   └── utils.py
    │   ├── olympiadbench
    │   │   ├── cn_utils.py
    │   │   ├── en_utils.py
    │   │   ├── olympiadbench.yaml
    │   │   ├── olympiadbench_evals.py
    │   │   ├── olympiadbench_test_cn.yaml
    │   │   ├── olympiadbench_test_en.yaml
    │   │   ├── olympiadbench_testmini.yaml
    │   │   └── testmini_utils.py
    │   ├── omni_bench
    │   │   ├── _default_template_yaml
    │   │   ├── omni_bench.yaml
    │   │   ├── omni_bench_audio_transcript.yaml
    │   │   ├── omni_bench_image_caption.yaml
    │   │   └── utils.py
    │   ├── open_asr
    │   │   ├── _default_template_yaml
    │   │   ├── openasr.yaml
    │   │   ├── openasr_ami.yaml
    │   │   ├── openasr_common_voice.yaml
    │   │   ├── openasr_earnings22.yaml
    │   │   ├── openasr_gigaspeech.yaml
    │   │   ├── openasr_librispeech.yaml
    │   │   ├── openasr_librispeech_test_clean.yaml
    │   │   ├── openasr_librispeech_test_other.yaml
    │   │   ├── openasr_spgispeech.yaml
    │   │   ├── openasr_tedlium.yaml
    │   │   ├── openasr_voxpopuli.yaml
    │   │   └── utils.py
    │   ├── openhermes
    │   │   ├── openhermes.yaml
    │   │   └── utils.py
    │   ├── people_speech
    │   │   ├── people_speech_val.yaml
    │   │   └── utils.py
    │   ├── perceptiontest
    │   │   ├── test
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── perceptiontest_mc.yaml
    │   │   │   ├── perceptiontest_mcppl.yaml
    │   │   │   └── utils.py
    │   │   └── val
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── perceptiontest_mc.yaml
    │   │   │   ├── perceptiontest_mcppl.yaml
    │   │   │   └── utils.py
    │   ├── plm_videobench
    │   │   ├── README.md
    │   │   ├── _default_template_yaml
    │   │   ├── eval_utils.py
    │   │   ├── fgqa
    │   │   │   ├── fgqa_test.yaml
    │   │   │   └── fgqa_utils.py
    │   │   ├── rcap
    │   │   │   ├── rcap_test.yaml
    │   │   │   └── rcap_utils.py
    │   │   ├── rdcap
    │   │   │   ├── rdcap_test.yaml
    │   │   │   └── rdcap_utils.py
    │   │   ├── rtloc
    │   │   │   ├── rtloc_test.yaml
    │   │   │   └── rtloc_utils.py
    │   │   └── sgqa
    │   │   │   ├── sgqa_test.yaml
    │   │   │   └── sgqa_utils.py
    │   ├── pope
    │   │   ├── pope.yaml
    │   │   ├── pope_adv.yaml
    │   │   ├── pope_full.yaml
    │   │   ├── pope_pop.yaml
    │   │   ├── pope_random.yaml
    │   │   └── utils.py
    │   ├── qbench
    │   │   ├── abench_dev.yaml
    │   │   ├── qbench2_dev.yaml
    │   │   ├── qbench_dev.yaml
    │   │   ├── qbenchs_dev.yaml
    │   │   └── utils.py
    │   ├── realworldqa
    │   │   ├── realworldqa.yaml
    │   │   └── utils.py
    │   ├── refcoco+
    │   │   ├── _default_template_bbox_rec_yaml
    │   │   ├── _default_template_bbox_yaml
    │   │   ├── _default_template_seg_yaml
    │   │   ├── _generate_config.py
    │   │   ├── _refcoco.yaml
    │   │   ├── refcoco+_bbox_rec_testA.yaml
    │   │   ├── refcoco+_bbox_rec_testB.yaml
    │   │   ├── refcoco+_bbox_rec_val.yaml
    │   │   ├── refcoco+_bbox_testA.yaml
    │   │   ├── refcoco+_bbox_testB.yaml
    │   │   ├── refcoco+_bbox_val.yaml
    │   │   ├── refcoco+_seg_testA.yaml
    │   │   ├── refcoco+_seg_testB.yaml
    │   │   ├── refcoco+_seg_val.yaml
    │   │   ├── utils.py
    │   │   └── utils_rec.py
    │   ├── refcoco
    │   │   ├── _default_template_bbox_rec_yaml
    │   │   ├── _default_template_bbox_yaml
    │   │   ├── _default_template_seg_yaml
    │   │   ├── _generate_config.py
    │   │   ├── _refcoco.yaml
    │   │   ├── refcoco_bbox_rec_test.yaml
    │   │   ├── refcoco_bbox_rec_testA.yaml
    │   │   ├── refcoco_bbox_rec_testB.yaml
    │   │   ├── refcoco_bbox_rec_val.yaml
    │   │   ├── refcoco_bbox_test.yaml
    │   │   ├── refcoco_bbox_testA.yaml
    │   │   ├── refcoco_bbox_testB.yaml
    │   │   ├── refcoco_bbox_val.yaml
    │   │   ├── refcoco_bbox_val_lite.yaml
    │   │   ├── refcoco_seg_test.yaml
    │   │   ├── refcoco_seg_testA.yaml
    │   │   ├── refcoco_seg_testB.yaml
    │   │   ├── refcoco_seg_val.yaml
    │   │   ├── utils.py
    │   │   └── utils_rec.py
    │   ├── refcocog
    │   │   ├── _default_template_bbox_rec_yaml
    │   │   ├── _default_template_bbox_yaml
    │   │   ├── _default_template_seg_yaml
    │   │   ├── _generate_config.py
    │   │   ├── _refcoco.yaml
    │   │   ├── refcocog_bbox_rec_test.yaml
    │   │   ├── refcocog_bbox_rec_val.yaml
    │   │   ├── refcocog_bbox_test.yaml
    │   │   ├── refcocog_bbox_val.yaml
    │   │   ├── refcocog_seg_test.yaml
    │   │   ├── refcocog_seg_val.yaml
    │   │   ├── utils.py
    │   │   └── utils_rec.py
    │   ├── scienceqa
    │   │   ├── scienceqa.yaml
    │   │   ├── scienceqa_full.yaml
    │   │   ├── scienceqa_img.yaml
    │   │   └── utils.py
    │   ├── screenspot
    │   │   ├── README.md
    │   │   ├── _default_template_rec_yaml
    │   │   ├── _default_template_reg_yaml
    │   │   ├── _screenspot.yaml
    │   │   ├── screenspot_rec_test.yaml
    │   │   ├── screenspot_reg_test.yaml
    │   │   ├── utils.py
    │   │   └── utils_rec.py
    │   ├── seedbench
    │   │   ├── ko_utils.py
    │   │   ├── seedbench.yaml
    │   │   ├── seedbench_ko.yaml
    │   │   ├── seedbench_lite.yaml
    │   │   ├── seedbench_ppl.yaml
    │   │   └── utils.py
    │   ├── seedbench_2
    │   │   ├── seedbench_2.yaml
    │   │   └── utils.py
    │   ├── seedbench_2_plus
    │   │   ├── seedbench_2_plus.yaml
    │   │   └── utils.py
    │   ├── stvqa
    │   │   ├── stvqa.yaml
    │   │   └── utils.py
    │   ├── synthdog
    │   │   ├── donut_evaluator.py
    │   │   ├── synthdog.yaml
    │   │   ├── synthdog_en.yaml
    │   │   ├── synthdog_zh.yaml
    │   │   └── utils.py
    │   ├── tedlium
    │   │   ├── tedlium_dev_test.yaml
    │   │   ├── tedlium_long_form.yaml
    │   │   └── utils.py
    │   ├── tempcompass
    │   │   ├── _default_template_yaml
    │   │   ├── _tempcompass.yaml
    │   │   ├── tempcompass_caption_matching.yaml
    │   │   ├── tempcompass_captioning.yaml
    │   │   ├── tempcompass_mc.yaml
    │   │   ├── tempcompass_yes_no.yaml
    │   │   └── utils.py
    │   ├── temporalbench
    │   │   ├── temporalbench.yaml
    │   │   ├── temporalbench_long_qa.yaml
    │   │   ├── temporalbench_short_caption.yaml
    │   │   ├── temporalbench_short_qa.yaml
    │   │   └── utils.py
    │   ├── textcaps
    │   │   ├── _default_template_textcaps_yaml
    │   │   ├── textcaps.yaml
    │   │   ├── textcaps_test.yaml
    │   │   ├── textcaps_train.yaml
    │   │   ├── textcaps_val.yaml
    │   │   ├── textcaps_val_lite.yaml
    │   │   └── utils.py
    │   ├── textvqa
    │   │   ├── _default_template_textvqa_yaml
    │   │   ├── _textvqa.yaml
    │   │   ├── textvqa_test.yaml
    │   │   ├── textvqa_val.yaml
    │   │   ├── textvqa_val_lite.yaml
    │   │   └── utils.py
    │   ├── vatex
    │   │   ├── _vatex.yaml
    │   │   ├── utils.py
    │   │   ├── vatex_test.yaml
    │   │   └── vatex_val_zh.yaml
    │   ├── vcr_wiki
    │   │   ├── _default_template_vcr_yaml
    │   │   ├── utils.py
    │   │   ├── vcr_wiki_en_easy.yaml
    │   │   ├── vcr_wiki_en_easy_100.yaml
    │   │   ├── vcr_wiki_en_easy_500.yaml
    │   │   ├── vcr_wiki_en_hard.yaml
    │   │   ├── vcr_wiki_en_hard_100.yaml
    │   │   ├── vcr_wiki_en_hard_500.yaml
    │   │   ├── vcr_wiki_zh_easy.yaml
    │   │   ├── vcr_wiki_zh_easy_100.yaml
    │   │   ├── vcr_wiki_zh_easy_500.yaml
    │   │   ├── vcr_wiki_zh_hard.yaml
    │   │   ├── vcr_wiki_zh_hard_100.yaml
    │   │   └── vcr_wiki_zh_hard_500.yaml
    │   ├── vdc
    │   │   ├── README.md
    │   │   ├── _default_template_yaml
    │   │   ├── background_test.yaml
    │   │   ├── camera_test.yaml
    │   │   ├── detailed_test.yaml
    │   │   ├── main_object_test.yaml
    │   │   ├── short_test.yaml
    │   │   └── utils.py
    │   ├── vibe_eval
    │   │   ├── utils.py
    │   │   └── vibe_eval.yaml
    │   ├── video_detail_description
    │   │   ├── README.md
    │   │   ├── _default_template_yaml
    │   │   ├── utils.py
    │   │   └── video_detail_description.yaml
    │   ├── videochatgpt
    │   │   ├── _default_template_yaml
    │   │   ├── _videochatgpt.yaml
    │   │   ├── utils.py
    │   │   ├── videochatgpt_consistency.yaml
    │   │   ├── videochatgpt_generic.yaml
    │   │   └── videochatgpt_temporal.yaml
    │   ├── videoevalpro
    │   │   ├── utils.py
    │   │   └── videoevalpro.yaml
    │   ├── videomathqa
    │   │   ├── README.md
    │   │   ├── cot_postprocess.py
    │   │   ├── cot_step_evaluation.py
    │   │   ├── utils.py
    │   │   ├── videomathqa_mbin.yaml
    │   │   ├── videomathqa_mbin_cot.yaml
    │   │   ├── videomathqa_mbin_cot_w_subtitle.yaml
    │   │   ├── videomathqa_mbin_w_subtitle.yaml
    │   │   ├── videomathqa_mcq.yaml
    │   │   ├── videomathqa_mcq_cot.yaml
    │   │   ├── videomathqa_mcq_cot_w_subtitle.yaml
    │   │   └── videomathqa_mcq_w_subtitle.yaml
    │   ├── videomme
    │   │   ├── utils.py
    │   │   ├── videomme.yaml
    │   │   └── videomme_w_subtitle.yaml
    │   ├── videommmu
    │   │   ├── _default_template_yaml
    │   │   ├── adaptation.yaml
    │   │   ├── adaptation_question_only.yaml
    │   │   ├── comprehension.yaml
    │   │   ├── perception.yaml
    │   │   ├── utils.py
    │   │   └── video_mmmu.yaml
    │   ├── vinoground
    │   │   ├── utils.py
    │   │   └── vinoground.yaml
    │   ├── vitatecs
    │   │   ├── _default_template_yaml
    │   │   ├── _vitatecs.yaml
    │   │   ├── utils.py
    │   │   ├── vitatecs_compositionality.yaml
    │   │   ├── vitatecs_direction.yaml
    │   │   ├── vitatecs_intensity.yaml
    │   │   ├── vitatecs_localization.yaml
    │   │   ├── vitatecs_sequence.yaml
    │   │   └── vitatecs_type.yaml
    │   ├── vizwiz_vqa
    │   │   ├── _default_template_vqa_yaml
    │   │   ├── _generate_config.py
    │   │   ├── _vizwiz_vqa.yaml
    │   │   ├── utils.py
    │   │   ├── vizwiz_vqa_test.yaml
    │   │   ├── vizwiz_vqa_val.yaml
    │   │   └── vizwiz_vqa_val_lite.yaml
    │   ├── vl_rewardbench
    │   │   ├── utils.py
    │   │   └── vl_rewardbench.yaml
    │   ├── vlmsareblind
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   ├── vlmsareblind.yaml
    │   │   └── vlmsareblind_lite.yaml
    │   ├── vmcbench
    │   │   ├── utils.py
    │   │   └── vmcbench.yaml
    │   ├── vocalsound
    │   │   ├── _default_template_yaml
    │   │   ├── utils.py
    │   │   ├── vocalsound_test.yaml
    │   │   └── vocalsound_val.yaml
    │   ├── vqav2
    │   │   ├── _default_template_vqav2_yaml
    │   │   ├── _vqav2.yaml
    │   │   ├── utils.py
    │   │   ├── vqav2_test.yaml
    │   │   ├── vqav2_val.yaml
    │   │   └── vqav2_val_lite.yaml
    │   ├── vsibench
    │   │   ├── utils.py
    │   │   └── vsibench.yaml
    │   ├── vstar_bench
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── _default_template_yaml
    │   │   ├── utils.py
    │   │   ├── vstar_bench.yaml
    │   │   ├── vstar_bench_direct_attributes.yaml
    │   │   └── vstar_bench_relative_position.yaml
    │   ├── wavcaps
    │   │   ├── utils.py
    │   │   └── wavcaps.yaml
    │   ├── websrc
    │   │   ├── README.md
    │   │   ├── utils.py
    │   │   ├── websrc.yaml
    │   │   ├── websrc_test.yaml
    │   │   └── websrc_val.yaml
    │   ├── wild_vision_bench
    │   │   ├── _default_template_yaml
    │   │   ├── utils.py
    │   │   ├── wild_vision_bench0617.yaml
    │   │   ├── wild_vision_bench0630.yaml
    │   │   └── wildvision_bench.yaml
    │   ├── worldqa
    │   │   ├── _default_template_yaml
    │   │   ├── utils.py
    │   │   ├── worldqa.yaml
    │   │   ├── worldqa_generation.yaml
    │   │   ├── worldqa_mc.yaml
    │   │   ├── worldqa_mc_evaluator.py
    │   │   └── worldqa_mcppl.yaml
    │   ├── worldsense
    │   │   ├── utils.py
    │   │   ├── worldsense.yaml
    │   │   └── worldsense_w_subtitle.yaml
    │   ├── xlrs
    │   │   ├── XLRS-lite.yaml
    │   │   └── mcq_utils.py
    │   └── youcook2
    │   │   ├── _default_template_yaml
    │   │   ├── utils.py
    │   │   └── youcook2_val.yaml
    └── utils.py
├── miscs
    ├── example_eval.yaml
    ├── llava_repr_requirements.txt
    ├── llava_result_check.md
    ├── llava_sglang_result_check.md
    ├── repr_torch_envs.txt
    ├── scienceqa_id.txt
    ├── script.sh
    ├── test_llava.py
    ├── test_scienceqa.py
    └── tinyllava_repr_requirements.txt
├── pyproject.toml
├── setup.py
└── tools
    ├── get_split_zip.py
    ├── get_video_avg_time.py
    ├── lite
        ├── embed.py
        ├── embedder
        │   ├── BaseEmbedder.py
        │   ├── ClipBgeEmbedder.py
        │   └── __init__.py
        ├── shrink.py
        └── shrinker
        │   ├── BaseShrinker.py
        │   ├── EmbedShrinker.py
        │   ├── __init__.py
        │   └── sampling_methods
        │       ├── __init__.py
        │       ├── kcenter_greedy.py
        │       └── sampling_def.py
    ├── live_bench
        ├── create_dataset.py
        ├── data_summary.ipynb
        ├── example.ipynb
        ├── filter.ipynb
        ├── live_bench
        │   ├── __init__.py
        │   ├── api
        │   │   └── live_bench.py
        │   ├── data_generator
        │   │   ├── __init__.py
        │   │   ├── check_prompt.md
        │   │   ├── default_criteria.md
        │   │   ├── example
        │   │   │   ├── example_output.json
        │   │   │   └── example_website.png
        │   │   ├── live_bench.py
        │   │   ├── live_bench_data.py
        │   │   ├── prompt.md
        │   │   ├── qa_generator.py
        │   │   ├── question_finalizer.py
        │   │   ├── response.py
        │   │   ├── score_getter.py
        │   │   ├── score_prompt.md
        │   │   └── utils
        │   │   │   ├── __init__.py
        │   │   │   ├── claude.py
        │   │   │   ├── extract_information.py
        │   │   │   ├── gemini.py
        │   │   │   └── gpt4v.py
        │   ├── driver
        │   │   ├── .gitignore
        │   │   ├── __init__.py
        │   │   └── load_driver.py
        │   ├── screen_shoter
        │   │   ├── __init__.py
        │   │   ├── screen.py
        │   │   └── screen_shoter.py
        │   ├── view.ipynb
        │   └── websites
        │   │   ├── __init__.py
        │   │   ├── load_website.py
        │   │   ├── website.py
        │   │   └── website_list.yaml
        ├── pyproject.toml
        ├── refine_all_results.py
        ├── script
        │   ├── README.md
        │   ├── change.ipynb
        │   ├── compare.ipynb
        │   ├── modify.ipynb
        │   ├── refractor.py
        │   ├── select.ipynb
        │   ├── update_banchmark.ipynb
        │   └── upload_results.py
        ├── setup.py
        └── summerize.ipynb
    ├── make_audio_hf_dataset.ipynb
    ├── make_image_hf_dataset.ipynb
    ├── make_vatex.py
    ├── make_video_hf_dataset.ipynb
    ├── make_video_hf_dataset_from_json.py
    ├── makecvrr.ipynb
    └── regression.py


/.github/issue_template.md:
--------------------------------------------------------------------------------
1 | Before you open an issue, please check if a similar issue already exists or has been closed before.
2 | 
3 | ### When you open an issue, please be sure to include the following
4 | 
5 | - [ ] A descriptive title: [xxx] XXXX
6 | - [ ] A detailed description
7 | 
8 | Thank you for your contributions!
9 | 


--------------------------------------------------------------------------------
/examples/models/aria.sh:
--------------------------------------------------------------------------------
1 | export HF_HOME="~/.cache/huggingface"
2 | # pip install git+https://github.com/EvolvingLMMs-Lab/lmms-eval.git
3 | 
4 | accelerate launch --num_processes=8 --main_process_port 12348 -m lmms_eval \
5 |     --model aria \
6 |     --model_args pretrained=rhymes-ai/Aria \
7 |     --tasks ai2d,chartqa,docvqa_val,mmmu_pro \
8 |     --batch_size 1


--------------------------------------------------------------------------------
/examples/models/xai_grok.sh:
--------------------------------------------------------------------------------
 1 | export HF_HOME="~/.cache/huggingface"
 2 | export OPENAI_API_KEY="xai-xxxxxxxxxx"
 3 | export OPENAI_API_BASE="https://api.x.ai/v1"
 4 | 
 5 | 
 6 | python3 -m lmms_eval \
 7 |     --model openai_compatible \
 8 |     --model_args model_version=grok-2-vision-1212 \
 9 |     --tasks ai2d,chartqa,docvqa_val,mathvista_testmini,mmmu_pro \
10 |     --batch_size 1


--------------------------------------------------------------------------------
/lmms_eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EvolvingLMMs-Lab/lmms-eval/d4383329aeaa6ffbcde94a9b31ca0eff7fee557c/lmms_eval/__init__.py


--------------------------------------------------------------------------------
/lmms_eval/api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EvolvingLMMs-Lab/lmms-eval/d4383329aeaa6ffbcde94a9b31ca0eff7fee557c/lmms_eval/api/__init__.py


--------------------------------------------------------------------------------
/lmms_eval/caching/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EvolvingLMMs-Lab/lmms-eval/d4383329aeaa6ffbcde94a9b31ca0eff7fee557c/lmms_eval/caching/__init__.py


--------------------------------------------------------------------------------
/lmms_eval/loggers/__init__.py:
--------------------------------------------------------------------------------
1 | from .evaluation_tracker import EvaluationTracker
2 | from .wandb_logger import WandbLogger
3 | 


--------------------------------------------------------------------------------
/lmms_eval/models/model_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EvolvingLMMs-Lab/lmms-eval/d4383329aeaa6ffbcde94a9b31ca0eff7fee557c/lmms_eval/models/model_utils/__init__.py


--------------------------------------------------------------------------------
/lmms_eval/models/video_chatgpt/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import VideoChatGPTLlamaForCausalLM
2 | 


--------------------------------------------------------------------------------
/lmms_eval/models/video_chatgpt/constants.py:
--------------------------------------------------------------------------------
 1 | CONTROLLER_HEART_BEAT_EXPIRATION = 30
 2 | WORKER_HEART_BEAT_INTERVAL = 15
 3 | 
 4 | LOGDIR = "."
 5 | 
 6 | 
 7 | # Defining model
 8 | DEFAULT_VIDEO_TOKEN = "<video>"
 9 | DEFAULT_VIDEO_PATCH_TOKEN = "<vid_patch>"
10 | DEFAULT_VID_START_TOKEN = "<vid_start>"
11 | DEFAULT_VID_END_TOKEN = "<vid_end>"
12 | 


--------------------------------------------------------------------------------
/lmms_eval/models/video_chatgpt/eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EvolvingLMMs-Lab/lmms-eval/d4383329aeaa6ffbcde94a9b31ca0eff7fee557c/lmms_eval/models/video_chatgpt/eval/__init__.py


--------------------------------------------------------------------------------
/lmms_eval/models/video_chatgpt/model/__init__.py:
--------------------------------------------------------------------------------
1 | from lmms_eval.models.video_chatgpt.model.video_chatgpt import (
2 |     VideoChatGPTConfig,
3 |     VideoChatGPTLlamaForCausalLM,
4 | )
5 | 


--------------------------------------------------------------------------------
/lmms_eval/models/video_chatgpt/utils.py:
--------------------------------------------------------------------------------
1 | def disable_torch_init():
2 |     """
3 |     Disable the redundant torch default initialization to accelerate model creation.
4 |     """
5 |     import torch
6 | 
7 |     setattr(torch.nn.Linear, "reset_parameters", lambda self: None)
8 |     setattr(torch.nn.LayerNorm, "reset_parameters", lambda self: None)
9 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/_task_utils/file_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | 
4 | def generate_submission_file(file_name, args, subpath="submissions"):
5 |     path = os.path.join(args.output_path, subpath)
6 |     os.makedirs(path, exist_ok=True)
7 |     path = os.path.join(path, file_name)
8 |     return os.path.abspath(path)
9 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/_task_utils/gpt_eval_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EvolvingLMMs-Lab/lmms-eval/d4383329aeaa6ffbcde94a9b31ca0eff7fee557c/lmms_eval/tasks/_task_utils/gpt_eval_utils.py


--------------------------------------------------------------------------------
/lmms_eval/tasks/air_bench/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/AIR_Bench
2 | dataset_kwargs:
3 |   token: True
4 | 
5 | metadata:
6 |   gpt_eval_model_name: gpt-4o
7 |   version: 0.0
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/air_bench/air_bench_chat.yaml:
--------------------------------------------------------------------------------
1 | group: air_bench_chat
2 | task:
3 |   - air_bench_chat_sound
4 |   - air_bench_chat_music
5 |   - air_bench_chat_speech
6 |   - air_bench_chat_mixed
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/air_bench/air_bench_foundation.yaml:
--------------------------------------------------------------------------------
1 | group: air_bench_foundation
2 | task:
3 |   - air_bench_foundation_sound
4 |   - air_bench_foundation_music
5 |   - air_bench_foundation_speech
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/arc/arc_challenge.yaml:
--------------------------------------------------------------------------------
1 | include: arc_easy.yaml
2 | task: arc_challenge
3 | dataset_name: ARC-Challenge
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/capability/capability_OCR.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_OCR
4 | dataset_name: OCR
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/capability/capability_action.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_action
4 | dataset_name: action
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/capability/capability_camera_angle.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_camera_angle
4 | dataset_name: camera_angle
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/capability/capability_camera_movement.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_camera_movement
4 | dataset_name: camera_movement
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/capability/capability_character_identification.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_character_identification
4 | dataset_name: character_identification
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/capability/capability_dynamic_object_number.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_dynamic_object_number
4 | dataset_name: dynamic_object_number
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/capability/capability_event.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_event
4 | dataset_name: event
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/capability/capability_object_category.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_object_category
4 | dataset_name: object_category
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/capability/capability_object_color.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_object_color
4 | dataset_name: object_color
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/capability/capability_object_number.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_object_number
4 | dataset_name: object_number
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/capability/capability_scene.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_scene
4 | dataset_name: scene
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/capability/capability_spatial_relation.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_spatial_relation
4 | dataset_name: spatial_relation
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/capability/capability_style.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | 
3 | task: capability_style
4 | dataset_name: style
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/clotho_aqa/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/ClothoAQA
 2 | dataset_kwargs:
 3 |   token: True
 4 | doc_to_target: "answer"
 5 | doc_to_visual: !function utils.clotho_aqa_doc_to_audio
 6 | doc_to_text: !function utils.clotho_aqa_doc_to_text
 7 | 
 8 | metadata:
 9 |   gpt_eval_model_name: gpt-4o
10 |   version: 0.0
11 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/clotho_aqa/clotho_aqa.yaml:
--------------------------------------------------------------------------------
1 | group: clotho_aqa
2 | task:
3 |   - clotho_aqa_val
4 |   - clotho_aqa_test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/cmmmu/_cmmmu.yaml:
--------------------------------------------------------------------------------
1 | group: cmmmu
2 | task:
3 | - cmmmu_val
4 | - cmmmu_test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/cmmmu/_default_template_cmmmu_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/CMMMU
2 | output_type: generate_until
3 | doc_to_visual: !function utils.cmmmu_doc_to_visual
4 | doc_to_text: !function utils.cmmmu_doc_to_text
5 | doc_to_target: "answer"
6 | generation_kwargs:
7 |   max_new_tokens: 16
8 |   image_aspect_ratio: original


--------------------------------------------------------------------------------
/lmms_eval/tasks/coco_cap/coco2014_cap.yaml:
--------------------------------------------------------------------------------
1 | group : coco2014_cap
2 | task:
3 |   - coco2014_cap_val
4 |   - coco2014_cap_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/coco_cap/coco2017_cap.yaml:
--------------------------------------------------------------------------------
1 | group : coco2017_cap
2 | task:
3 |   - coco2017_cap_val
4 |   - coco2017_cap_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/coco_cap/coco_cap.yaml:
--------------------------------------------------------------------------------
1 | group : coco_cap
2 | task:
3 |   - coco2014_cap_val
4 |   - coco2014_cap_test
5 |   - coco2017_cap_val
6 |   - coco2017_cap_test
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/coco_cap/coco_karpathy.yaml:
--------------------------------------------------------------------------------
1 | group : coco_karpathy
2 | task:
3 |   - coco_karpathy_val
4 |   - coco_karpathy_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/common_voice_15/common_voice_15.yaml:
--------------------------------------------------------------------------------
1 | group: common_voice_15
2 | task:
3 | - common_voice_15_zh-CN
4 | - common_voice_15_en
5 | - common_voice_15_fr


--------------------------------------------------------------------------------
/lmms_eval/tasks/common_voice_15/common_voice_15_en.yaml:
--------------------------------------------------------------------------------
 1 | task : "common_voice_15_en"
 2 | dataset_name: en
 3 | lmms_eval_specific_kwargs:
 4 |   default:
 5 |     pre_prompt: ""
 6 |     post_prompt: ""
 7 |   qwen2_audio:
 8 |     pre_prompt: ""
 9 |     post_prompt: " <|en|>"
10 | include : _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/common_voice_15/common_voice_15_fr.yaml:
--------------------------------------------------------------------------------
 1 | task : "common_voice_15_fr"
 2 | dataset_name: fr
 3 | lmms_eval_specific_kwargs:
 4 |   default:
 5 |     pre_prompt: ""
 6 |     post_prompt: ""
 7 |   qwen2_audio:
 8 |     pre_prompt: ""
 9 |     post_prompt: " <|fr|>"
10 | include : _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/common_voice_15/common_voice_15_zh-CN.yaml:
--------------------------------------------------------------------------------
 1 | task : "common_voice_15_zh-CN"
 2 | dataset_name: zh-CN
 3 | lmms_eval_specific_kwargs:
 4 |   default:
 5 |     pre_prompt: ""
 6 |     post_prompt: ""
 7 |   qwen2_audio:
 8 |     pre_prompt: ""
 9 |     post_prompt: " <|zh|>"
10 | include : _default_template_yaml
11 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/covost2/covost2.yaml:
--------------------------------------------------------------------------------
1 | group: covost2
2 | task:
3 |   - covost2_en_zh
4 |   - covost2_zh_en


--------------------------------------------------------------------------------
/lmms_eval/tasks/covost2/covost2_en_zh.yaml:
--------------------------------------------------------------------------------
1 | group: covost2_en_zh
2 | task:
3 |   - covost2_en_zh_test
4 |   - covost2_en_zh_dev


--------------------------------------------------------------------------------
/lmms_eval/tasks/covost2/covost2_zh_en.yaml:
--------------------------------------------------------------------------------
1 | group: covost2_zh_en
2 | task:
3 |   - covost2_zh_en_test
4 |   - covost2_zh_en_dev


--------------------------------------------------------------------------------
/lmms_eval/tasks/covost2/covost2_zh_en_dev.yaml:
--------------------------------------------------------------------------------
1 | task: "covost2_zh_en_dev"
2 | include: _default_template_zh_en_yaml
3 | test_split: dev
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/covost2/covost2_zh_en_test.yaml:
--------------------------------------------------------------------------------
1 | task: "covost2_zh_en_test"
2 | include: _default_template_zh_en_yaml
3 | test_split: test
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/cuva/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: fesvhtr/CUVA_LMMs
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: cuva
6 | metadata:
7 |   version: 0.0
8 |   gpt_eval_model_name: "gpt-4-0613"


--------------------------------------------------------------------------------
/lmms_eval/tasks/cuva/cuva.yaml:
--------------------------------------------------------------------------------
1 | group : cuva
2 | task:
3 | - cuva_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/cvrr/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/CVRR-ES
 2 | dataset_kwargs:
 3 |   token: True
 4 |   video: True
 5 |   cache_dir: cvrr-es
 6 | lmms_eval_specific_kwargs:
 7 |   default:
 8 |     pre_prompt: ""
 9 |     post_prompt: ""
10 | 
11 | metadata:
12 |   version: 0.0
13 |   gpt_eval_model_name: gpt-3.5-turbo-0125


--------------------------------------------------------------------------------
/lmms_eval/tasks/detailcaps/_default_template_detailcaps_yaml:
--------------------------------------------------------------------------------
1 | lmms_eval_specific_kwargs:
2 |   default:
3 |     prompt: "Describe this image in detail."


--------------------------------------------------------------------------------
/lmms_eval/tasks/docvqa/docvqa.yaml:
--------------------------------------------------------------------------------
1 | group: docvqa
2 | task:
3 | - docvqa_val
4 | - docvqa_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/docvqa/docvqa_test.yaml:
--------------------------------------------------------------------------------
1 | task: "docvqa_test"
2 | test_split: test
3 | process_results: !function utils.docvqa_test_process_results
4 | metric_list:
5 |   - metric: submission
6 |     aggregation: !function utils.docvqa_test_aggregate_results
7 |     higher_is_better: true
8 | include: _default_template_docvqa_yaml
9 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/docvqa/docvqa_val.yaml:
--------------------------------------------------------------------------------
1 | task: "docvqa_val"
2 | test_split: validation
3 | metric_list:
4 |   - metric: anls
5 |     aggregation: mean
6 |     higher_is_better: true
7 | include: _default_template_docvqa_yaml
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/egoschema/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/egoschema
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: egoschema
6 | lmms_eval_specific_kwargs:
7 |   default:
8 |     pre_prompt: ""
9 |     post_prompt: ""


--------------------------------------------------------------------------------
/lmms_eval/tasks/egothink/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: EgoLife-v1/Egothink
2 | dataset_kwargs:
3 |   token: True
4 | test_split: test
5 | metadata:
6 |   version: 0.0
7 |   gpt_eval_model_name: "gpt-4"


--------------------------------------------------------------------------------
/lmms_eval/tasks/fleurs/fleurs.yaml:
--------------------------------------------------------------------------------
1 | group: fleurs 
2 | task:
3 | - fleurs_en
4 | - fleurs_cmn_hans_cn


--------------------------------------------------------------------------------
/lmms_eval/tasks/fleurs/fleurs_cmn_hans_cn.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: cmn_hans_cn
2 | task: fleurs_cmn_hans_cn
3 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/fleurs/fleurs_en.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: en_us
2 | include: _default_template_yaml
3 | task: fleurs_en


--------------------------------------------------------------------------------
/lmms_eval/tasks/fleurs/fleurs_yue_hant_hk.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: yue_hant_hk
2 | include: _default_template_yaml
3 | task: fleurs_yue_hant_hk


--------------------------------------------------------------------------------
/lmms_eval/tasks/flickr30k/flickr30k.yaml:
--------------------------------------------------------------------------------
1 | group: flickr30k
2 | task:
3 | - flickr30k_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/funqa/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: fesvhtr/FunQA_LMMs
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: funqa
6 | metadata:
7 |   version: 0.0
8 |   gpt_eval_model_name: "gpt-4-0613"


--------------------------------------------------------------------------------
/lmms_eval/tasks/funqa/funqa.yaml:
--------------------------------------------------------------------------------
1 | group : funqa
2 | task:
3 | - funqa_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/gigaspeech/gigaspeech.yaml:
--------------------------------------------------------------------------------
1 | group: gigaspeech
2 | task:
3 |   - gigaspeech_dev
4 |   - gigaspeech_test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/cot_n_shot/gpqa_diamond_cot_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_cot_n_shot_yaml
4 | task: gpqa_diamond_cot_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/cot_n_shot/gpqa_extended_cot_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_cot_n_shot_yaml
4 | task: gpqa_extended_cot_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/cot_n_shot/gpqa_main_cot_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_cot_n_shot_yaml
4 | task: gpqa_main_cot_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/cot_zeroshot/gpqa_diamond_cot_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_cot_zeroshot_yaml
4 | task: gpqa_diamond_cot_zeroshot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/cot_zeroshot/gpqa_extended_cot_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_cot_zeroshot_yaml
4 | task: gpqa_extended_cot_zeroshot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/cot_zeroshot/gpqa_main_cot_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_cot_zeroshot_yaml
4 | task: gpqa_main_cot_zeroshot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/generative/gpqa_diamond_generative_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_generative_n_shot_yaml
4 | task: gpqa_diamond_generative_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/generative/gpqa_extended_generative_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_generative_n_shot_yaml
4 | task: gpqa_extended_generative_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/generative/gpqa_main_generative_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_generative_n_shot_yaml
4 | task: gpqa_main_generative_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/n_shot/gpqa_diamond_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_n_shot_yaml
4 | task: gpqa_diamond_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/n_shot/gpqa_extended_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_n_shot_yaml
4 | task: gpqa_extended_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/n_shot/gpqa_main_n_shot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_n_shot_yaml
4 | task: gpqa_main_n_shot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/zeroshot/gpqa_diamond_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_diamond
3 | include: _gpqa_zeroshot_yaml
4 | task: gpqa_diamond_zeroshot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/zeroshot/gpqa_extended_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_extended
3 | include: _gpqa_zeroshot_yaml
4 | task: gpqa_extended_zeroshot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/gpqa/zeroshot/gpqa_main_zeroshot.yaml:
--------------------------------------------------------------------------------
1 | # Generated by _generate_configs.py
2 | dataset_name: gpqa_main
3 | include: _gpqa_zeroshot_yaml
4 | task: gpqa_main_zeroshot
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/hrbench/hrbench.yaml:
--------------------------------------------------------------------------------
1 | group: hrbench
2 | task:
3 |   - hrbench4k
4 |   - hrbench8k
5 | metadata:
6 |   version: 0.0
7 |   gpt_eval_model_name: "gpt-3.5-turbo"
8 |   max_workers: 1


--------------------------------------------------------------------------------
/lmms_eval/tasks/iconqa/iconqa.yaml:
--------------------------------------------------------------------------------
1 | group: iconqa
2 | task:
3 | - iconqa_val
4 | - iconqa_test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/iconqa/iconqa_test.yaml:
--------------------------------------------------------------------------------
1 | task: "iconqa_test"
2 | test_split: test
3 | include: _default_template_docvqa_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/iconqa/iconqa_val.yaml:
--------------------------------------------------------------------------------
1 | task: "iconqa_val"
2 | test_split: val
3 | include: _default_template_docvqa_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/illusionvqa/illusionvqa_comprehension.yaml:
--------------------------------------------------------------------------------
1 | include: illusionvqa.yaml
2 | task: illusionvqa_comprehension
3 | dataset_path: csebuetnlp/illusionVQA-Comprehension


--------------------------------------------------------------------------------
/lmms_eval/tasks/illusionvqa/illusionvqa_soft_localization.yaml:
--------------------------------------------------------------------------------
1 | include: illusionvqa.yaml
2 | task: illusionvqa_soft_localization
3 | dataset_path: csebuetnlp/illusionVQA-Soft-Localization


--------------------------------------------------------------------------------
/lmms_eval/tasks/infovqa/infovqa.yaml:
--------------------------------------------------------------------------------
1 | group: infovqa
2 | task:
3 | - infovqa_val
4 | - infovqa_test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/infovqa/infovqa_test.yaml:
--------------------------------------------------------------------------------
 1 | task: "infovqa_test"
 2 | test_split: test
 3 | output_type: generate_until
 4 | process_results: !function utils.infovqa_test_process_results
 5 | metric_list:
 6 |   - metric: submission
 7 |     aggregation: !function utils.infovqa_test_aggregate_results
 8 |     higher_is_better: true
 9 | include: _default_template_infovqa_yaml
10 |   


--------------------------------------------------------------------------------
/lmms_eval/tasks/infovqa/infovqa_val.yaml:
--------------------------------------------------------------------------------
1 | task: "infovqa_val"
2 | test_split: validation
3 | output_type: generate_until
4 | metric_list:
5 |   - metric: anls
6 |     aggregation: mean
7 |     higher_is_better: true
8 | include: _default_template_infovqa_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/internal_eval/_default_template_internal_eval_yaml:
--------------------------------------------------------------------------------
1 | lmms_eval_specific_kwargs:
2 |   default:
3 |     pre_prompt: ""
4 |     post_prompt: ""
5 | process_results_use_image: true
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/internal_eval/internal_eval.yaml:
--------------------------------------------------------------------------------
1 | group: internal_eval
2 | task:
3 | - d170_cn
4 | - d170_en
5 | - dc100_en
6 | - dc200_cn
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_accounting.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Accounting
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_accounting"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_agriculture.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Agriculture
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_agriculture"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_architecture_and_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Architecture_and_Engineering
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_architecture_and_engineering"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_basic_medical_science.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Basic_Medical_Science
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_basic_medical_science"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_biology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Biology
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_biology"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_chemistry.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Chemistry
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_chemistry"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_clinical_medicine.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Clinical_Medicine
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_clinical_medicine"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_computer_science.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Computer_Science
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_computer_science"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_design.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Design
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_design"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_diagnostics_and_laboratory_medicine.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Diagnostics_and_Laboratory_Medicine
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_diagnostics_and_laboratory_medicine"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_economics.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Economics
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_economics"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_electronics.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Electronics
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_electronics"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_energy_and_power.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Energy_and_Power
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_energy_and_power"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_finance.yaml:
--------------------------------------------------------------------------------
1 | 
2 | dataset_name: Finance
3 | tag: "jmmmu_culture_agnostic"
4 | task: "jmmmu_finance"
5 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_japanese_art.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Japanese_Art
2 | tag: "jmmmu_culture_specific"
3 | task: "jmmmu_japanese_art"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_japanese_heritage.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Japanese_Heritage
2 | tag: "jmmmu_culture_specific"
3 | task: "jmmmu_japanese_heritage"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_japanese_history.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Japanese_History
2 | tag: "jmmmu_culture_specific"
3 | task: "jmmmu_japanese_history"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_manage.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Manage
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_manage"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_marketing.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Marketing
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_marketing"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_materials.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Materials
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_materials"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_math.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Math
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_math"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_mechanical_engineering.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Mechanical_Engineering
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_mechanical_engineering"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_music.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Music
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_music"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_pharmacy.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Pharmacy
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_pharmacy"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_physics.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Physics
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_physics"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_psychology.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Psychology
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_psychology"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_public_health.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: Public_Health
2 | tag: "jmmmu_culture_agnostic"
3 | task: "jmmmu_public_health"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/jmmmu/jmmmu_world_history.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: World_History
2 | tag: "jmmmu_culture_specific"
3 | task: "jmmmu_world_history"
4 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/librispeech/librispeech.yaml:
--------------------------------------------------------------------------------
1 | group: librispeech
2 | task:
3 |   - librispeech_dev_clean
4 |   - librispeech_dev_other
5 |   - librispeech_test_clean
6 |   - librispeech_test_other
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/librispeech/librispeech_long.yaml:
--------------------------------------------------------------------------------
1 | group: librispeech_long
2 | task:
3 |   - librispeech_test_clean_long
4 |   - librispeech_test_other_long
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/librispeech/librispeech_test_clean_long.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/Librispeech-concat
2 | task : "librispeech_test_clean_long"
3 | test_split: test_clean
4 | process_results: !function utils.librispeech_long_process_result
5 | include: _default_yaml_template


--------------------------------------------------------------------------------
/lmms_eval/tasks/librispeech/librispeech_test_other_long.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/Librispeech-concat
2 | task : "librispeech_test_other_long"
3 | test_split: test_other
4 | process_results: !function utils.librispeech_long_process_result
5 | include: _default_yaml_template


--------------------------------------------------------------------------------
/lmms_eval/tasks/live_bench/live_bench.yaml:
--------------------------------------------------------------------------------
 1 | group: live_bench
 2 | task:
 3 | - live_bench_2406
 4 | - live_bench_2407
 5 | - live_bench_2409
 6 | 
 7 | metadata:
 8 |   api_type: azure
 9 |   eval_with_mini: false
10 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/live_bench/live_bench_2406.yaml:
--------------------------------------------------------------------------------
1 | task: "live_bench_2406"
2 | dataset_name: 2024-06
3 | include: live_bench_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/live_bench/live_bench_2407.yaml:
--------------------------------------------------------------------------------
1 | task: "live_bench_2407"
2 | dataset_name: 2024-07
3 | include: live_bench_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/live_bench/live_bench_2409.yaml:
--------------------------------------------------------------------------------
1 | task: "live_bench_2409"
2 | dataset_name: 2024-09
3 | include: live_bench_template_yaml_v2
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/livexiv_tqa/livexiv_tqa.yaml:
--------------------------------------------------------------------------------
 1 | group: livexiv_tqa
 2 | task:
 3 | - livexiv_tqa_v1
 4 | - livexiv_tqa_v2
 5 | - livexiv_tqa_v3
 6 | - livexiv_tqa_v4
 7 | - livexiv_tqa_v5
 8 | - livexiv_tqa_v6
 9 | 
10 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v1.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_tqa_v1"
2 | dataset_name: "TQA-2024-09-21"
3 | include: livexiv_tqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v2.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_tqa_v2"
2 | dataset_name: "TQA-2024-10-26"
3 | include: livexiv_tqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v3.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_tqa_v3"
2 | dataset_name: "v3-TQA"
3 | include: livexiv_tqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v4.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_tqa_v4"
2 | dataset_name: "v4-TQA"
3 | include: livexiv_tqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v5.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_tqa_v5"
2 | dataset_name: "v5-TQA"
3 | include: livexiv_tqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v6.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_tqa_v6"
2 | dataset_name: "v6-TQA"
3 | include: livexiv_tqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/livexiv_vqa/livexiv_vqa.yaml:
--------------------------------------------------------------------------------
 1 | group: livexiv_vqa
 2 | task:
 3 | - livexiv_vqa_v1
 4 | - livexiv_vqa_v2
 5 | - livexiv_vqa_v3
 6 | - livexiv_vqa_v4
 7 | - livexiv_vqa_v5
 8 | - livexiv_vqa_v6
 9 | 
10 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v1.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_vqa_v1"
2 | dataset_name: "VQA-2024-09-21"
3 | include: livexiv_vqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v2.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_vqa_v2"
2 | dataset_name: "VQA-2024-10-26"
3 | include: livexiv_vqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v3.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_vqa_v3"
2 | dataset_name: "v3-VQA"
3 | include: livexiv_vqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v4.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_vqa_v4"
2 | dataset_name: "v4-VQA"
3 | include: livexiv_vqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v5.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_vqa_v5"
2 | dataset_name: "v5-VQA"
3 | include: livexiv_vqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v6.yaml:
--------------------------------------------------------------------------------
1 | task: "livexiv_vqa_v6"
2 | dataset_name: "v6-VQA"
3 | include: livexiv_vqa_template_yaml
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/llava_interleave_bench/_default_template_interleave_yaml:
--------------------------------------------------------------------------------
1 | output_type: generate_until
2 | generation_kwargs:
3 |   until:
4 |     - "ASSISTANT:"
5 |   image_aspect_ratio: pad
6 | metadata:
7 |   version: 0.0
8 |   api_type : openai
9 |   gpt_eval_model_name: "gpt-3.5-turbo"


--------------------------------------------------------------------------------
/lmms_eval/tasks/llava_interleave_bench/interleave_bench.yaml:
--------------------------------------------------------------------------------
1 | group: llava_interleave_bench
2 | task:
3 | - llava_interleave_bench_in_domain
4 | - llava_interleave_bench_out_domain
5 | - llava_interleave_bench_multi_view


--------------------------------------------------------------------------------
/lmms_eval/tasks/mathverse/mathverse_testmini_text.yaml:
--------------------------------------------------------------------------------
 1 | group: mathverse_testmini_text
 2 | task:
 3 |   - mathverse_testmini_text_lite
 4 |   - mathverse_testmini_text_dominant
 5 |   - mathverse_testmini_text_only
 6 | metadata:
 7 |   version: 0.0
 8 |   gpt_eval_model_name: "gpt-3.5-turbo"
 9 |   trunk_response: 30
10 |   quick_match: false


--------------------------------------------------------------------------------
/lmms_eval/tasks/mathverse/mathverse_testmini_vision.yaml:
--------------------------------------------------------------------------------
 1 | group: mathverse_testmini_vision
 2 | task:
 3 |   - mathverse_testmini_vision_intensive
 4 |   - mathverse_testmini_vision_dominant
 5 |   - mathverse_testmini_vision_only
 6 | metadata:
 7 |   version: 0.0
 8 |   gpt_eval_model_name: "gpt-3.5-turbo"
 9 |   trunk_response: 30
10 |   quick_match: false


--------------------------------------------------------------------------------
/lmms_eval/tasks/mathvista/mathvista.yaml:
--------------------------------------------------------------------------------
1 | group: mathvista
2 | task:
3 |   - mathvista_testmini
4 |   - mathvista_test
5 | metadata:
6 |   version: 0.0
7 |   gpt_eval_model_name: "gpt-3.5-turbo"
8 |   quick_extract: false


--------------------------------------------------------------------------------
/lmms_eval/tasks/mathvista/mathvista_testmini.yaml:
--------------------------------------------------------------------------------
1 | group: mathvista_testmini
2 | task:
3 |   - mathvista_testmini_cot
4 |   - mathvista_testmini_solution
5 |   - mathvista_testmini_format
6 | metadata:
7 |   version: 0.0
8 |   gpt_eval_model_name: "gpt-3.5-turbo"
9 |   quick_extract: false


--------------------------------------------------------------------------------
/lmms_eval/tasks/megabench/megabench.yaml:
--------------------------------------------------------------------------------
1 | group: megabench
2 | task:
3 | - megabench_core
4 | - megabench_open
5 | - megabench_core_si
6 | - megabench_open_si


--------------------------------------------------------------------------------
/lmms_eval/tasks/megabench/megabench_core.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: core
2 | task: "megabench_core"
3 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/megabench/megabench_core_si.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: core_single_image
2 | task: "megabench_core_si"
3 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/megabench/megabench_open.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: open
2 | task: "megabench_open"
3 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/megabench/megabench_open_si.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: open_single_image
2 | task: "megabench_open_si"
3 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/megabench/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from metrics.aggregation_type import AggregationType
2 | from metrics.metric_type import MetricType
3 | from metrics.response_parse_type import ResponseParseType
4 | 
5 | __all__ = [AggregationType, MetricType, ResponseParseType]
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/megabench/metrics/aggregation/unsupported_agg.py:
--------------------------------------------------------------------------------
1 | from numbers import Number
2 | from typing import Dict
3 | 
4 | 
5 | class UnsupportedAggregation:
6 |     @staticmethod
7 |     def aggregate(scores: Dict[str, Number], weights: Dict[str, Number]) -> Number:
8 |         return -1
9 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/megabench/metrics/parsing/dummy_parse.py:
--------------------------------------------------------------------------------
1 | class DummyParse:
2 |     @staticmethod
3 |     def parse(response: str, *args, **kwargs) -> dict:
4 |         """return the raw string without doing anything"""
5 |         return response.strip()
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/megabench/metrics/scoring/unsupported_scoring.py:
--------------------------------------------------------------------------------
1 | class UnsupportedScoring:
2 |     """Unsupported scoring."""
3 | 
4 |     @staticmethod
5 |     def match(response: str, correct_answer: str) -> int:
6 |         """Default response for unimplemented metrics."""
7 |         return -1
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/megabench/requirements.txt:
--------------------------------------------------------------------------------
 1 | filelock==3.16.1
 2 | geopy==2.4.1
 3 | jieba==0.42.1
 4 | nltk==3.9.1
 5 | numpy==1.26.4
 6 | pronouncing==0.2.0
 7 | rapidfuzz==3.9.5
 8 | regex==2024.7.24
 9 | Requests==2.32.3
10 | sacrebleu==2.4.3
11 | sympy==1.13.2
12 | tqdm==4.66.4
13 | Unidecode==1.3.8
14 | antlr4-python3-runtime==4.11.0
15 | requests==2.32.3
16 | requests_cache==1.2.1


--------------------------------------------------------------------------------
/lmms_eval/tasks/mix_evals/audio2text/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_kwargs:
 2 |   token: true
 3 | dataset_path: lmms-lab/MixEval-X-audio2text
 4 | lmms_eval_specific_kwargs:
 5 |   default:
 6 |     post_prompt: ""
 7 |     pre_prompt: ""
 8 | metadata:
 9 |   gpt_eval_model_name: gpt-4o-mini
10 |   version: 0


--------------------------------------------------------------------------------
/lmms_eval/tasks/mix_evals/audio2text/mix_evals_audio2text.yaml:
--------------------------------------------------------------------------------
1 | group: mix_evals_audio2text
2 | task:
3 | - mix_evals_audio2_text_freeform
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mix_evals/audio2text/mix_evals_audio2text_hard.yaml:
--------------------------------------------------------------------------------
1 | group: mix_evals_audio2text_hard
2 | task:
3 | - mix_evals_audio2_text_freeform_hard
4 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text.yaml:
--------------------------------------------------------------------------------
1 | group: mix_evals_image2text
2 | task:
3 | - mix_evals_image2text_mc
4 | - mix_evals_image2text_freeform
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_hard.yaml:
--------------------------------------------------------------------------------
1 | group: mix_evals_image2text_hard
2 | task:
3 | - mix_evals_image2text_mc_hard
4 | - mix_evals_image2text_freeform_hard
5 | # - mix_evals_image2text_openended


--------------------------------------------------------------------------------
/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text.yaml:
--------------------------------------------------------------------------------
1 | group: mix_evals_video2text
2 | task:
3 | - mix_evals_video2text_mc
4 | - mix_evals_video2text_freeform
5 | # - mix_evals_video2text_openended


--------------------------------------------------------------------------------
/lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_hard.yaml:
--------------------------------------------------------------------------------
1 | group: mix_evals_video2text_hard
2 | task:
3 | - mix_evals_video2text_mc_hard
4 | - mix_evals_video2text_freeform_hard
5 | # - mix_evals_video2text_openended


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmau/mmau.yaml:
--------------------------------------------------------------------------------
1 | group: mmau
2 | task:
3 |   - mmau_test_mini
4 |   - mmau_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmau/mmau_test.yaml:
--------------------------------------------------------------------------------
1 | task: "mmau_test"
2 | test_split: test
3 | 
4 | metric_list:
5 |   - metric: submission
6 |     aggregation: !function utils.mmau_aggregate_results_for_submission
7 |     higher_is_better: true
8 | 
9 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmau/mmau_test_mini.yaml:
--------------------------------------------------------------------------------
1 | task: "mmau_test_mini"
2 | test_split: test_mini
3 | 
4 | metric_list:
5 |   - metric: accuracy
6 |     aggregation: !function utils.mmau_aggregate_results
7 |     higher_is_better: true
8 | 
9 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmbench/mmbench.yaml:
--------------------------------------------------------------------------------
 1 | group: mmbench
 2 | task:
 3 |   - mmbench_en_dev
 4 |   - mmbench_en_test
 5 |   - mmbench_cn_dev
 6 |   - mmbench_cn_test
 7 |   - mmbench_cn_cc
 8 |   - mmbench_ru_dev
 9 | metadata:
10 |   version: 0.0
11 |   sys_prompt: "There are several options:"
12 |   gpt_eval_model_name: "gpt-3.5-turbo-0613"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmbench/mmbench_cn.yaml:
--------------------------------------------------------------------------------
1 | group: mmbench_cn
2 | task:
3 |   - mmbench_cn_dev
4 |   - mmbench_cn_test
5 |   - mmbench_cn_cc
6 | metadata:
7 |   version: 0.0
8 |   gpt_eval_model_name: "gpt-3.5-turbo-0613"
9 |   sys_prompt: "有如下几个选项："


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmbench/mmbench_cn_test.yaml:
--------------------------------------------------------------------------------
1 | task: mmbench_cn_test
2 | test_split: test
3 | metric_list:
4 |   - metric: submission
5 |     aggregation: !function cn_utils.mmbench_aggregate_test_results
6 |     higher_is_better: true
7 | include: _default_template_mmbench_cn_yaml
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmbench/mmbench_en.yaml:
--------------------------------------------------------------------------------
1 | group: mmbench_en
2 | task:
3 |   - mmbench_en_dev
4 |   - mmbench_en_test
5 | metadata:
6 |   version: 0.0
7 |   sys_prompt: "There are several options:"
8 |   gpt_eval_model_name: "gpt-3.5-turbo-0613"
9 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmbench/mmbench_en_test.yaml:
--------------------------------------------------------------------------------
1 | task: "mmbench_en_test"
2 | test_split: test
3 | include: _default_template_mmbench_en_yaml
4 | metric_list:
5 |   - metric: submission
6 |     aggregation: !function en_utils.mmbench_aggregate_test_results
7 |     higher_is_better: true
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_abstract_algebra"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_anatomy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_astronomy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_business_ethics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_clinical_knowledge"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_biology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_chemistry"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_college_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_computer_science"
2 | "description": "The following are questions (with answers) about college\
3 |   \ computer science.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_computer_science"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_mathematics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_medicine"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_college_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_computer_security"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_conceptual_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_econometrics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_electrical_engineering.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "electrical_engineering"
2 | "description": "The following are questions (with answers) about electrical\
3 |   \ engineering.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_electrical_engineering"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_elementary_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "elementary_mathematics"
2 | "description": "The following are questions (with answers) about elementary\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_elementary_mathematics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_formal_logic"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_global_facts"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_biology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_chemistry"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school chemistry.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_chemistry"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_computer_science"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school computer science.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_computer_science"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_european_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_european_history"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school european history.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_european_history"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_geography.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_geography"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school geography.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_geography"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_macroeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_macroeconomics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school macroeconomics.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_macroeconomics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_mathematics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school mathematics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_mathematics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_microeconomics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_microeconomics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school microeconomics.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_microeconomics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_psychology"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school psychology.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_psychology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_statistics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_statistics"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school statistics.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_statistics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_us_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_us_history"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school us history.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_us_history"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_high_school_world_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_world_history"
2 | "description": "The following are questions (with answers) about high\
3 |   \ school world history.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_high_school_world_history"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_human_aging"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_human_sexuality"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_international_law"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_jurisprudence"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_logical_fallacies"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_continuation_stem"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_machine_learning"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_management"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_marketing"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_medical_genetics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_miscellaneous"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_moral_disputes"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_moral_scenarios"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_nutrition"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_philosophy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_prehistory"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_professional_accounting.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_accounting"
2 | "description": "The following are questions (with answers) about professional\
3 |   \ accounting.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_professional_accounting"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_professional_law"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_professional_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_medicine"
2 | "description": "The following are questions (with answers) about professional\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_professional_medicine"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_professional_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_psychology"
2 | "description": "The following are questions (with answers) about professional\
3 |   \ psychology.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_professional_psychology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_public_relations"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_security_studies"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_sociology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_continuation_social_sciences"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_us_foreign_policy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_continuation_other"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_virology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/continuation/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_continuation_humanities"
5 | "include": "_continuation_template_yaml"
6 | "task": "mmlu_continuation_world_religions"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/_mmlu.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu
 2 | task:
 3 |   - mmlu_stem
 4 |   - mmlu_other
 5 |   - mmlu_social_sciences
 6 |   - mmlu_humanities
 7 | aggregate_metric_list:
 8 |   - metric: acc
 9 |     weight_by_size: True
10 | metadata:
11 |   version: 2
12 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/_mmlu_humanities.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_humanities
 2 | group_alias: humanities
 3 | task:
 4 |   - mmlu_humanities_tasks
 5 | aggregate_metric_list:
 6 |   - metric: acc
 7 |     weight_by_size: True
 8 | metadata:
 9 |   version: 2
10 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/_mmlu_other.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_other
 2 | group_alias: other
 3 | task:
 4 |   - mmlu_other_tasks
 5 | aggregate_metric_list:
 6 |   - metric: acc
 7 |     weight_by_size: True
 8 | metadata:
 9 |   version: 2
10 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/_mmlu_social_sciences.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_social_sciences
 2 | group_alias: social sciences
 3 | task:
 4 |   - mmlu_social_sciences_tasks
 5 | aggregate_metric_list:
 6 |   - metric: acc
 7 |     weight_by_size: True
 8 | metadata:
 9 |   version: 2
10 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/_mmlu_stem.yaml:
--------------------------------------------------------------------------------
 1 | group: mmlu_stem
 2 | group_alias: stem
 3 | task:
 4 |   - mmlu_stem_tasks
 5 | aggregate_metric_list:
 6 |   - metric: acc
 7 |     weight_by_size: True
 8 | metadata:
 9 |   version: 2
10 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are multiple choice questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_abstract_algebra"
7 | "task_alias": "abstract_algebra"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_anatomy"
7 | "task_alias": "anatomy"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_astronomy"
7 | "task_alias": "astronomy"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_business_ethics"
7 | "task_alias": "business_ethics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are multiple choice questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_clinical_knowledge"
7 | "task_alias": "clinical_knowledge"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_biology"
7 | "task_alias": "college_biology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_chemistry"
7 | "task_alias": "college_chemistry"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_college_computer_science.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_computer_science"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ computer science.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_computer_science"
7 | "task_alias": "college_computer_science"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_mathematics"
7 | "task_alias": "college_mathematics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_medicine"
7 | "task_alias": "college_medicine"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_physics"
7 | "task_alias": "college_physics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are multiple choice questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_computer_security"
7 | "task_alias": "computer_security"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are multiple choice questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_conceptual_physics"
7 | "task_alias": "conceptual_physics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_econometrics"
7 | "task_alias": "econometrics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_electrical_engineering.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "electrical_engineering"
2 | "description": "The following are multiple choice questions (with answers) about electrical\
3 |   \ engineering.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_electrical_engineering"
7 | "task_alias": "electrical_engineering"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_elementary_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "elementary_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about elementary\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_elementary_mathematics"
7 | "task_alias": "elementary_mathematics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_formal_logic"
7 | "task_alias": "formal_logic"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_global_facts"
7 | "task_alias": "global_facts"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_biology"
7 | "task_alias": "high_school_biology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_high_school_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school chemistry.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_chemistry"
7 | "task_alias": "high_school_chemistry"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_high_school_geography.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_geography"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school geography.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_geography"
7 | "task_alias": "high_school_geography"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_high_school_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school mathematics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_mathematics"
7 | "task_alias": "high_school_mathematics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_physics"
7 | "task_alias": "high_school_physics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_high_school_psychology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_psychology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school psychology.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_psychology"
7 | "task_alias": "high_school_psychology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_high_school_statistics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_statistics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school statistics.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_statistics"
7 | "task_alias": "high_school_statistics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_high_school_us_history.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_us_history"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school us history.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_us_history"
7 | "task_alias": "high_school_us_history"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_human_aging"
7 | "task_alias": "human_aging"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_human_sexuality"
7 | "task_alias": "human_sexuality"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are multiple choice questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_international_law"
7 | "task_alias": "international_law"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_jurisprudence"
7 | "task_alias": "jurisprudence"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are multiple choice questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_logical_fallacies"
7 | "task_alias": "logical_fallacies"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are multiple choice questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_stem_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_machine_learning"
7 | "task_alias": "machine_learning"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_management"
7 | "task_alias": "management"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_marketing"
7 | "task_alias": "marketing"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are multiple choice questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_medical_genetics"
7 | "task_alias": "medical_genetics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_miscellaneous"
7 | "task_alias": "miscellaneous"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_moral_disputes"
7 | "task_alias": "moral_disputes"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_moral_scenarios"
7 | "task_alias": "moral_scenarios"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_nutrition"
7 | "task_alias": "nutrition"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_philosophy"
7 | "task_alias": "philosophy"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_prehistory"
7 | "task_alias": "prehistory"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_professional_accounting.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_accounting"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ accounting.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_accounting"
7 | "task_alias": "professional_accounting"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_law"
7 | "task_alias": "professional_law"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_professional_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_medicine"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_medicine"
7 | "task_alias": "professional_medicine"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are multiple choice questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_public_relations"
7 | "task_alias": "public_relations"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are multiple choice questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_security_studies"
7 | "task_alias": "security_studies"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_sociology"
7 | "task_alias": "sociology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are multiple choice questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_social_sciences_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_us_foreign_policy"
7 | "task_alias": "us_foreign_policy"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_other_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_virology"
7 | "task_alias": "virology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/default/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are multiple choice questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_humanities_tasks"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_world_religions"
7 | "task_alias": "world_religions"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are multiple choice questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_abstract_algebra"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_anatomy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_astronomy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_business_ethics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are multiple choice questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_clinical_knowledge"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_biology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_chemistry"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_mathematics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_medicine"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_college_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are multiple choice questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_computer_security"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are multiple choice questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_conceptual_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_econometrics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_electrical_engineering.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "electrical_engineering"
2 | "description": "The following are multiple choice questions (with answers) about electrical\
3 |   \ engineering.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_electrical_engineering"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_formal_logic"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_global_facts"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_biology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school chemistry.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_chemistry"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_high_school_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_human_aging"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_human_sexuality"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are multiple choice questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_international_law"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_jurisprudence"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are multiple choice questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_logical_fallacies"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are multiple choice questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_stem"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_machine_learning"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_management"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_marketing"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are multiple choice questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_medical_genetics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_miscellaneous"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_moral_disputes"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_moral_scenarios"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_nutrition"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_philosophy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_prehistory"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_professional_law"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_medicine"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_professional_medicine"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are multiple choice questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_public_relations"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are multiple choice questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_security_studies"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_sociology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are multiple choice questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_social_sciences"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_us_foreign_policy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_cot_zeroshot_other"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_virology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are multiple choice questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_flan_cot_zeroshot_humanities"
5 | "include": "_mmlu_flan_cot_zeroshot_template_yaml"
6 | "task": "mmlu_flan_cot_zeroshot_world_religions"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are multiple choice questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_abstract_algebra"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_anatomy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_astronomy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_business_ethics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are multiple choice questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_clinical_knowledge"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_biology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_chemistry"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_medicine"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_college_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are multiple choice questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_computer_security"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are multiple choice questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_conceptual_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_econometrics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_formal_logic"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_global_facts"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_human_aging"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_human_sexuality"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_jurisprudence"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are multiple choice questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_stem"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_machine_learning"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_management"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_marketing"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are multiple choice questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_medical_genetics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_miscellaneous"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_moral_disputes"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_moral_scenarios"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_nutrition"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_philosophy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_prehistory"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_professional_law"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_social_sciences"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_sociology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_generative_other"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_virology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are multiple choice questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_flan_n_shot_generative_humanities"
5 | "include": "_mmlu_flan_generative_template_yaml"
6 | "task": "mmlu_flan_n_shot_generative_world_religions"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_anatomy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_astronomy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_business_ethics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_college_biology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_stem"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_college_physics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_econometrics"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_formal_logic"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_global_facts"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_human_aging"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_jurisprudence"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_management"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_marketing"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_miscellaneous"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_nutrition"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_philosophy"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_humanities"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_prehistory"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_social_sciences"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_sociology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_flan_n_shot_loglikelihood_other"
5 | "include": "_mmlu_flan_loglikelihood_template_yaml"
6 | "task": "mmlu_flan_n_shot_loglikelihood_virology"
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_abstract_algebra.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "abstract_algebra"
2 | "description": "The following are multiple choice questions (with answers) about abstract\
3 |   \ algebra.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_abstract_algebra_generative"
7 | "task_alias": "abstract_algebra"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_anatomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "anatomy"
2 | "description": "The following are multiple choice questions (with answers) about anatomy.\n\
3 |   \n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_anatomy_generative"
7 | "task_alias": "anatomy"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_astronomy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "astronomy"
2 | "description": "The following are multiple choice questions (with answers) about astronomy.\n\
3 |   \n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_astronomy_generative"
7 | "task_alias": "astronomy"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_business_ethics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "business_ethics"
2 | "description": "The following are multiple choice questions (with answers) about business\
3 |   \ ethics.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_business_ethics_generative"
7 | "task_alias": "business_ethics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_clinical_knowledge.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "clinical_knowledge"
2 | "description": "The following are multiple choice questions (with answers) about clinical\
3 |   \ knowledge.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_clinical_knowledge_generative"
7 | "task_alias": "clinical_knowledge"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_college_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_biology"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ biology.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_biology_generative"
7 | "task_alias": "college_biology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_college_chemistry.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_chemistry"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ chemistry.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_chemistry_generative"
7 | "task_alias": "college_chemistry"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_college_mathematics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_mathematics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ mathematics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_mathematics_generative"
7 | "task_alias": "college_mathematics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_college_medicine.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_medicine"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ medicine.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_medicine_generative"
7 | "task_alias": "college_medicine"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_college_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "college_physics"
2 | "description": "The following are multiple choice questions (with answers) about college\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_college_physics_generative"
7 | "task_alias": "college_physics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_computer_security.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "computer_security"
2 | "description": "The following are multiple choice questions (with answers) about computer\
3 |   \ security.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_computer_security_generative"
7 | "task_alias": "computer_security"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_conceptual_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "conceptual_physics"
2 | "description": "The following are multiple choice questions (with answers) about conceptual\
3 |   \ physics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_conceptual_physics_generative"
7 | "task_alias": "conceptual_physics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_econometrics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "econometrics"
2 | "description": "The following are multiple choice questions (with answers) about econometrics.\n\
3 |   \n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_econometrics_generative"
7 | "task_alias": "econometrics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_formal_logic.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "formal_logic"
2 | "description": "The following are multiple choice questions (with answers) about formal\
3 |   \ logic.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_formal_logic_generative"
7 | "task_alias": "formal_logic"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_global_facts.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "global_facts"
2 | "description": "The following are multiple choice questions (with answers) about global\
3 |   \ facts.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_global_facts_generative"
7 | "task_alias": "global_facts"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_high_school_biology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_biology"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school biology.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_biology_generative"
7 | "task_alias": "high_school_biology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_high_school_physics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "high_school_physics"
2 | "description": "The following are multiple choice questions (with answers) about high\
3 |   \ school physics.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_high_school_physics_generative"
7 | "task_alias": "high_school_physics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_human_aging.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_aging"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ aging.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_human_aging_generative"
7 | "task_alias": "human_aging"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_human_sexuality.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "human_sexuality"
2 | "description": "The following are multiple choice questions (with answers) about human\
3 |   \ sexuality.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_human_sexuality_generative"
7 | "task_alias": "human_sexuality"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_international_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "international_law"
2 | "description": "The following are multiple choice questions (with answers) about international\
3 |   \ law.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_international_law_generative"
7 | "task_alias": "international_law"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_jurisprudence.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "jurisprudence"
2 | "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_jurisprudence_generative"
7 | "task_alias": "jurisprudence"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_logical_fallacies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "logical_fallacies"
2 | "description": "The following are multiple choice questions (with answers) about logical\
3 |   \ fallacies.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_logical_fallacies_generative"
7 | "task_alias": "logical_fallacies"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_machine_learning.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "machine_learning"
2 | "description": "The following are multiple choice questions (with answers) about machine\
3 |   \ learning.\n\n"
4 | "tag": "mmlu_stem_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_machine_learning_generative"
7 | "task_alias": "machine_learning"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_management.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "management"
2 | "description": "The following are multiple choice questions (with answers) about management.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_management_generative"
7 | "task_alias": "management"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_marketing.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "marketing"
2 | "description": "The following are multiple choice questions (with answers) about marketing.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_marketing_generative"
7 | "task_alias": "marketing"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_medical_genetics.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "medical_genetics"
2 | "description": "The following are multiple choice questions (with answers) about medical\
3 |   \ genetics.\n\n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_medical_genetics_generative"
7 | "task_alias": "medical_genetics"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_miscellaneous.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "miscellaneous"
2 | "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_miscellaneous_generative"
7 | "task_alias": "miscellaneous"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_moral_disputes.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_disputes"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ disputes.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_moral_disputes_generative"
7 | "task_alias": "moral_disputes"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_moral_scenarios.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "moral_scenarios"
2 | "description": "The following are multiple choice questions (with answers) about moral\
3 |   \ scenarios.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_moral_scenarios_generative"
7 | "task_alias": "moral_scenarios"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_nutrition.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "nutrition"
2 | "description": "The following are multiple choice questions (with answers) about nutrition.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_nutrition_generative"
7 | "task_alias": "nutrition"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_philosophy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "philosophy"
2 | "description": "The following are multiple choice questions (with answers) about philosophy.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_philosophy_generative"
7 | "task_alias": "philosophy"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_prehistory.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "prehistory"
2 | "description": "The following are multiple choice questions (with answers) about prehistory.\n\
3 |   \n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_prehistory_generative"
7 | "task_alias": "prehistory"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_professional_law.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "professional_law"
2 | "description": "The following are multiple choice questions (with answers) about professional\
3 |   \ law.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_professional_law_generative"
7 | "task_alias": "professional_law"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_public_relations.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "public_relations"
2 | "description": "The following are multiple choice questions (with answers) about public\
3 |   \ relations.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_public_relations_generative"
7 | "task_alias": "public_relations"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_security_studies.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "security_studies"
2 | "description": "The following are multiple choice questions (with answers) about security\
3 |   \ studies.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_security_studies_generative"
7 | "task_alias": "security_studies"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_sociology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "sociology"
2 | "description": "The following are multiple choice questions (with answers) about sociology.\n\
3 |   \n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_sociology_generative"
7 | "task_alias": "sociology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_us_foreign_policy.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "us_foreign_policy"
2 | "description": "The following are multiple choice questions (with answers) about us\
3 |   \ foreign policy.\n\n"
4 | "tag": "mmlu_social_sciences_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_us_foreign_policy_generative"
7 | "task_alias": "us_foreign_policy"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_virology.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "virology"
2 | "description": "The following are multiple choice questions (with answers) about virology.\n\
3 |   \n"
4 | "tag": "mmlu_other_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_virology_generative"
7 | "task_alias": "virology"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu/generative/mmlu_world_religions.yaml:
--------------------------------------------------------------------------------
1 | "dataset_name": "world_religions"
2 | "description": "The following are multiple choice questions (with answers) about world\
3 |   \ religions.\n\n"
4 | "tag": "mmlu_humanities_generative"
5 | "include": "_default_template_yaml"
6 | "task": "mmlu_world_religions_generative"
7 | "task_alias": "world_religions"
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu_pro/mmlu_pro_health.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about health. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_health"
4 | task_alias: "health"
5 | process_docs: !function utils.process_health
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu_pro/mmlu_pro_law.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about law. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_law"
4 | task_alias: "law"
5 | process_docs: !function utils.process_law
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu_pro/mmlu_pro_math.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about math. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_math"
4 | task_alias: "math"
5 | process_docs: !function utils.process_math
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmlu_pro/mmlu_pro_other.yaml:
--------------------------------------------------------------------------------
1 | description: "The following are multiple choice questions (with answers) about other. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice."
2 | include: "_default_template_yaml"
3 | task: "mmlu_pro_other"
4 | task_alias: "other"
5 | process_docs: !function utils.process_other
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmmu/_default_template_yaml:
--------------------------------------------------------------------------------
1 | generation_kwargs:
2 |   max_new_tokens: 16
3 | 
4 | metadata:
5 |   version: 0.0
6 |   interleaved_format: false


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmmu/arial.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EvolvingLMMs-Lab/lmms-eval/d4383329aeaa6ffbcde94a9b31ca0eff7fee557c/lmms_eval/tasks/mmmu/arial.ttf


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmmu/mmmu.yaml:
--------------------------------------------------------------------------------
1 | group: mmmu
2 | task:
3 | - mmmu_val
4 | - mmmu_test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmmu/mmmu_group_img.yaml:
--------------------------------------------------------------------------------
1 | group: mmmu_group_img
2 | task:
3 | - mmmu_val_group_img
4 | - mmmu_test_group_img
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmmu_pro/_default_template_yaml:
--------------------------------------------------------------------------------
1 | generation_kwargs:
2 |   max_new_tokens: 256
3 | 
4 | metadata:
5 |   version: 0.0
6 |   interleaved_format: false


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmmu_pro/mmmu_pro.yaml:
--------------------------------------------------------------------------------
1 | group: mmmu_pro
2 | task:
3 | - mmmu_pro_vision
4 | # - mmmu_pro_composite # removing composite task in formal MMMU-Pro evaluation
5 | - mmmu_pro_standard
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmmu_pro/mmmu_pro_cot.yaml:
--------------------------------------------------------------------------------
1 | group: mmmu_pro_cot
2 | task:
3 | - mmmu_pro_vision_cot
4 | - mmmu_pro_composite_cot
5 | - mmmu_pro_original_cot
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmsearch/retrieve_content/tokenization/__init__.py:
--------------------------------------------------------------------------------
1 | # Implement your code here.
2 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmt/_default_template_yaml:
--------------------------------------------------------------------------------
1 | lmms_eval_specific_kwargs:
2 |   default:
3 |     pre_prompt: ""
4 |     post_prompt: "\nAnswer the question using a single character from the given options."
5 | generation_kwargs:
6 |   max_new_tokens: 8
7 | metadata:
8 |   version: 0.0
9 |   task_type: image


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmt/mmt.yaml:
--------------------------------------------------------------------------------
1 | group: mmt
2 | task:
3 | - mmt_val
4 | - mmt_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmt/mmt_mi.yaml:
--------------------------------------------------------------------------------
1 | group: mmt_mi
2 | task:
3 | - mmt_mi_val
4 | - mmt_mi_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmupd/mmaad_base.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmaad_base"
 2 | test_split: test
 3 | dataset_name: mmaad_base
 4 | lmms_eval_specific_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\n"
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmaad_base
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmupd/mmiasd_base.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmiasd_base"
 2 | test_split: test
 3 | dataset_name: mmiasd_base
 4 | lmms_eval_specific_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\n"
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmiasd_base
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmupd/mmivqd_base.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmivqd_base"
 2 | test_split: test
 3 | dataset_name: mmivqd_base
 4 | lmms_eval_specific_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\n"
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmivqd_base
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmupd/mmupd.yaml:
--------------------------------------------------------------------------------
 1 | group: mmupd
 2 | task:
 3 |   - mmaad_base
 4 |   - mmaad_option
 5 |   - mmaad_instruction
 6 |   - mmiasd_base
 7 |   - mmiasd_option
 8 |   - mmiasd_instruction
 9 |   - mmivqd_base
10 |   - mmivqd_option
11 |   - mmivqd_instruction
12 | metadata:
13 |   version: 0.0
14 |   sys_prompt: ""
15 |   gpt_eval_model_name: "gpt-3.5-turbo-0125"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmupd/mmupd_base.yaml:
--------------------------------------------------------------------------------
 1 | group: mmupd_base
 2 | task:
 3 |   - mmaad_base
 4 |   - mmiasd_base
 5 |   - mmivqd_base
 6 | metadata:
 7 |   version: 0.0
 8 |   sys_prompt: ""
 9 |   gpt_eval_model_name: "gpt-3.5-turbo-0125"
10 |   


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmupd/mmupd_instruction.yaml:
--------------------------------------------------------------------------------
1 | group: mmupd_instruction
2 | task:
3 |   - mmaad_instruction
4 |   - mmiasd_instruction
5 |   - mmivqd_instruction
6 | metadata:
7 |   version: 0.0
8 |   sys_prompt: ""
9 |   gpt_eval_model_name: "gpt-3.5-turbo-0125"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mmupd/mmupd_option.yaml:
--------------------------------------------------------------------------------
1 | group: mmupd_option
2 | task:
3 |   - mmaad_option
4 |   - mmiasd_option
5 |   - mmivqd_option
6 | metadata:
7 |   version: 0.0
8 |   sys_prompt: ""
9 |   gpt_eval_model_name: "gpt-3.5-turbo-0125"


--------------------------------------------------------------------------------
/lmms_eval/tasks/multidocvqa/multidocvqa.yaml:
--------------------------------------------------------------------------------
1 | group: multidocvqa
2 | task:
3 | - multidocvqa_val
4 | - multidocvqa_test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/arabic_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: arabic
4 |     token: True
5 | task: "llava_in_the_wild_arabic"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/bengali_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: bengali
4 |     token: True
5 | task: "llava_in_the_wild_bengali"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/chinese_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: chinese
4 |     token: True
5 | task: "llava_in_the_wild_chinese"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/french_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: french
4 |     token: True
5 | task: "llava_in_the_wild_french"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/hindi_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: hindi
4 |     token: True
5 | task: "llava_in_the_wild_hindi"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/japanese_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: japanese
4 |     token: True
5 | task: "llava_in_the_wild_japanese"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/russian_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: russian
4 |     token: True
5 | task: "llava_in_the_wild_russian"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/spanish_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |   config: spanish
4 |   token: True
5 | task: "llava_in_the_wild_spanish"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/urdu_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: urdu
4 |     token: True
5 | task: "llava_in_the_wild_urdu"
6 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_action_antonym.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_action_antonym
3 | dataset_name: action_antonym
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_antonym
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_action_count.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_action_count
3 | dataset_name: action_count
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_count
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_action_localization.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_action_localization
3 | dataset_name: action_localization
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_localization
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_action_prediction.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_action_prediction
3 | dataset_name: action_prediction
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_prediction
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_action_sequence.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_action_sequence
3 | dataset_name: action_sequence
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_sequence
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_character_order.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_character_order
3 | dataset_name: character_order
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: character_order
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_counterfactual_inference.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_counterfactual_inference
3 | dataset_name: counterfactual_inference
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: counterfactual_inference
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_egocentric_navigation.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_egocentric_navigation
3 | dataset_name: egocentric_navigation
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: egocentric_navigation
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_fine_grained_action.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_fine_grained_action
3 | dataset_name: fine_grained_action
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: fine_grained_action
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_fine_grained_pose.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_fine_grained_pose
3 | dataset_name: fine_grained_pose
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: fine_grained_pose
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_moving_attribute.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_moving_attribute
3 | dataset_name: moving_attribute
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: moving_attribute
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_moving_count.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_moving_count
3 | dataset_name: moving_count
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: moving_count
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_moving_direction.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_moving_direction
3 | dataset_name: moving_direction
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: moving_direction
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_object_existence.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_object_existence
3 | dataset_name: object_existence
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: object_existence
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_object_interaction.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_object_interaction
3 | dataset_name: object_interaction
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: object_interaction
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_object_shuffle.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_object_shuffle
3 | dataset_name: object_shuffle
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: object_shuffle
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_scene_transition.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_scene_transition
3 | dataset_name: scene_transition
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: scene_transition
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_state_change.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_state_change
3 | dataset_name: state_change
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: state_change
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/mvbench/mvbench_unexpected_action.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: mvbench_unexpected_action
3 | dataset_name: unexpected_action
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: unexpected_action
8 |     post_prompt: "Only give the best option.\n"


--------------------------------------------------------------------------------
/lmms_eval/tasks/nextqa/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/NExTQA
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: nextqa
6 | metadata:
7 |   version: 0.0.1
8 |   load_package: True
9 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/nextqa/nextqa.yaml:
--------------------------------------------------------------------------------
1 | group: nextqa
2 | task:
3 | - nextqa_oe_test
4 | - nextqa_oe_val
5 | - nextqa_mc_test
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/nocaps/_default_template_nocaps_yaml:
--------------------------------------------------------------------------------
1 | lmms_eval_specific_kwargs:
2 |   default:
3 |     prompt: "Provide a one-sentence caption for the provided image."
4 |   plm:
5 |     prompt: "Describe the image briefly."


--------------------------------------------------------------------------------
/lmms_eval/tasks/nocaps/nocaps.yaml:
--------------------------------------------------------------------------------
1 | group : nocaps
2 | task:
3 |   - nocaps_test
4 |   - nocaps_val


--------------------------------------------------------------------------------
/lmms_eval/tasks/ocrbench_v2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EvolvingLMMs-Lab/lmms-eval/d4383329aeaa6ffbcde94a9b31ca0eff7fee557c/lmms_eval/tasks/ocrbench_v2/__init__.py


--------------------------------------------------------------------------------
/lmms_eval/tasks/ocrbench_v2/spotting_eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EvolvingLMMs-Lab/lmms-eval/d4383329aeaa6ffbcde94a9b31ca0eff7fee557c/lmms_eval/tasks/ocrbench_v2/spotting_eval/__init__.py


--------------------------------------------------------------------------------
/lmms_eval/tasks/ok_vqa/_ok_vqa.yaml:
--------------------------------------------------------------------------------
1 | group: ok_vqa
2 | task:
3 | - ok_vqa_val2014


--------------------------------------------------------------------------------
/lmms_eval/tasks/ok_vqa/ok_vqa_val2014.yaml:
--------------------------------------------------------------------------------
1 | task: ok_vqa_val2014
2 | test_split: val2014
3 | include: _default_template_vqa_yaml


--------------------------------------------------------------------------------
/lmms_eval/tasks/olympiadbench/olympiadbench.yaml:
--------------------------------------------------------------------------------
1 | group: olympiadbench
2 | task:
3 | - olympiadbench_test_en
4 | - olympiadbench_test_cn
5 | metadata:
6 |   - version: 0.0
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/open_asr/openasr.yaml:
--------------------------------------------------------------------------------
 1 | group: openasr
 2 | task:
 3 | - open_asr_ami
 4 | - open_asr_common_voice
 5 | - open_asr_earnings22
 6 | - open_asr_gigaspeech
 7 | - open_asr_librispeech_test_clean
 8 | - open_asr_librispeech_test_other
 9 | - open_asr_spgispeech
10 | - open_asr_voxpopuli
11 | - open_asr_tedlium


--------------------------------------------------------------------------------
/lmms_eval/tasks/open_asr/openasr_ami.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: ami 
2 | test_split: test
3 | include: _default_template_yaml
4 | task: open_asr_ami


--------------------------------------------------------------------------------
/lmms_eval/tasks/open_asr/openasr_common_voice.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: common_voice 
2 | test_split: test
3 | include: _default_template_yaml
4 | task: open_asr_common_voice


--------------------------------------------------------------------------------
/lmms_eval/tasks/open_asr/openasr_earnings22.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: earnings22 
2 | test_split: test
3 | include: _default_template_yaml
4 | task: open_asr_earnings22


--------------------------------------------------------------------------------
/lmms_eval/tasks/open_asr/openasr_gigaspeech.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: gigaspeech 
2 | test_split: test
3 | include: _default_template_yaml
4 | task: open_asr_gigaspeech


--------------------------------------------------------------------------------
/lmms_eval/tasks/open_asr/openasr_librispeech.yaml:
--------------------------------------------------------------------------------
1 | group: openasr_librispeech 
2 | task:
3 | - open_asr_librispeech_test_other
4 | - open_asr_librispeech_test_clean


--------------------------------------------------------------------------------
/lmms_eval/tasks/open_asr/openasr_librispeech_test_clean.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: librispeech
2 | test_split: test.clean
3 | include: _default_template_yaml
4 | task: open_asr_librispeech_test_clean


--------------------------------------------------------------------------------
/lmms_eval/tasks/open_asr/openasr_librispeech_test_other.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: librispeech
2 | test_split: test.other
3 | include: _default_template_yaml
4 | task: open_asr_librispeech_test_other


--------------------------------------------------------------------------------
/lmms_eval/tasks/open_asr/openasr_spgispeech.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: spgispeech 
2 | test_split: test
3 | include: _default_template_yaml
4 | task: open_asr_spgispeech


--------------------------------------------------------------------------------
/lmms_eval/tasks/open_asr/openasr_tedlium.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: tedlium
2 | test_split: test
3 | include: _default_template_yaml
4 | task: open_asr_tedlium


--------------------------------------------------------------------------------
/lmms_eval/tasks/open_asr/openasr_voxpopuli.yaml:
--------------------------------------------------------------------------------
1 | dataset_name: voxpopuli 
2 | test_split: test
3 | include: _default_template_yaml
4 | task: open_asr_voxpopuli


--------------------------------------------------------------------------------
/lmms_eval/tasks/perceptiontest/test/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/PerceptionTest
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: perceptiontest
6 | lmms_eval_specific_kwargs:
7 |   default:
8 |     pre_prompt: ""
9 |     post_prompt: ""


--------------------------------------------------------------------------------
/lmms_eval/tasks/perceptiontest/val/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/PerceptionTest_Val
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: perceptiontest_val
6 | lmms_eval_specific_kwargs:
7 |   default:
8 |     pre_prompt: ""
9 |     post_prompt: ""


--------------------------------------------------------------------------------
/lmms_eval/tasks/pope/pope_full.yaml:
--------------------------------------------------------------------------------
1 | group : pope_full
2 | task:
3 |   - pope_adv
4 |   - pope_pop
5 |   - pope_random


--------------------------------------------------------------------------------
/lmms_eval/tasks/qbench/qbenchs_dev.yaml:
--------------------------------------------------------------------------------
1 | group: qbenchs_dev
2 | task:
3 | - qbench_dev
4 | - qbench2_dev
5 | - abench_dev
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/_refcoco.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+
2 | task:
3 | - refcoco+_seg_val
4 | - refcoco+_seg_testA
5 | - refcoco+_seg_testB
6 | - refcoco+_bbox_val
7 | - refcoco+_bbox_testA
8 | - refcoco+_bbox_testB
9 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_bbox_rec_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox_rec
2 | task: refcoco+_bbox_rec_testA
3 | include: _default_template_bbox_rec_yaml
4 | test_split: testA
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_bbox_rec_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox_rec
2 | task: refcoco+_bbox_rec_testB
3 | include: _default_template_bbox_rec_yaml
4 | test_split: testB
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_bbox_rec_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox_rec
2 | task: refcoco+_bbox_rec_val
3 | include: _default_template_bbox_rec_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_bbox_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox
2 | task: refcoco+_bbox_testA
3 | include: _default_template_bbox_yaml
4 | test_split: testA
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_bbox_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox
2 | task: refcoco+_bbox_testB
3 | include: _default_template_bbox_yaml
4 | test_split: testB
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_bbox_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox
2 | task: refcoco+_bbox_val
3 | include: _default_template_bbox_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_seg_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_seg
2 | task: refcoco+_seg_testA
3 | include: _default_template_seg_yaml
4 | test_split: testA
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_seg_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_seg
2 | task: refcoco+_seg_testB
3 | include: _default_template_seg_yaml
4 | test_split: testB
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco+/refcoco+_seg_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_seg
2 | task: refcoco+_seg_val
3 | include: _default_template_seg_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/_refcoco.yaml:
--------------------------------------------------------------------------------
 1 | group: refcoco
 2 | task:
 3 | - refcoco_seg_test
 4 | - refcoco_seg_val
 5 | - refcoco_seg_testA
 6 | - refcoco_seg_testB
 7 | - refcoco_bbox_test
 8 | - refcoco_bbox_val
 9 | - refcoco_bbox_testA
10 | - refcoco_bbox_testB
11 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_bbox_rec_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox_rec
2 | task: refcoco_bbox_rec_test
3 | test_split: test
4 | include: _default_template_bbox_rec_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_bbox_rec_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox_rec
2 | task: refcoco_bbox_rec_testA
3 | test_split: testA
4 | include: _default_template_bbox_rec_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_bbox_rec_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox_rec
2 | task: refcoco_bbox_rec_testB
3 | test_split: testB
4 | include: _default_template_bbox_rec_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_bbox_rec_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox_rec
2 | task: refcoco_bbox_rec_val
3 | test_split: val
4 | include: _default_template_bbox_rec_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_bbox_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_test
3 | test_split: test
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_bbox_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_testA
3 | test_split: testA
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_bbox_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_testB
3 | test_split: testB
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_bbox_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_val
3 | test_split: val
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_seg_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_test
3 | test_split: test
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_seg_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_testA
3 | test_split: testA
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_seg_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_testB
3 | test_split: testB
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcoco/refcoco_seg_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_val
3 | test_split: val
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcocog/_refcoco.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog
2 | task:
3 | - refcocog_seg_test
4 | - refcocog_seg_val
5 | - refcocog_bbox_test
6 | - refcocog_bbox_val
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcocog/refcocog_bbox_rec_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_bbox_rec
2 | task: refcocog_bbox_rec_test
3 | include: _default_template_bbox_rec_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcocog/refcocog_bbox_rec_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_bbox_rec
2 | task: refcocog_bbox_rec_val
3 | include: _default_template_bbox_rec_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcocog/refcocog_bbox_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_bbox
2 | task: refcocog_bbox_test
3 | include: _default_template_bbox_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcocog/refcocog_bbox_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_bbox
2 | task: refcocog_bbox_val
3 | include: _default_template_bbox_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcocog/refcocog_seg_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_seg
2 | task: refcocog_seg_test
3 | include: _default_template_seg_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/refcocog/refcocog_seg_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_seg
2 | task: refcocog_seg_val
3 | include: _default_template_seg_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/scienceqa/scienceqa_full.yaml:
--------------------------------------------------------------------------------
1 | group: scienceqa_full
2 | task:
3 |   - scienceqa
4 |   - scienceqa_img


--------------------------------------------------------------------------------
/lmms_eval/tasks/screenspot/_screenspot.yaml:
--------------------------------------------------------------------------------
1 | group: screenspot
2 | task:
3 | - screenspot_reg_test
4 | - screenspot_rec_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/screenspot/screenspot_rec_test.yaml:
--------------------------------------------------------------------------------
1 | group: screenspot_rec
2 | task: screenspot_rec_test
3 | include: _default_template_rec_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/screenspot/screenspot_reg_test.yaml:
--------------------------------------------------------------------------------
1 | group: screenspot_reg
2 | task: screenspot_reg_test
3 | include: _default_template_reg_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/synthdog/synthdog.yaml:
--------------------------------------------------------------------------------
1 | group: synthdog
2 | task:
3 | - synthdog_en
4 | - synthdog_zh


--------------------------------------------------------------------------------
/lmms_eval/tasks/tempcompass/_tempcompass.yaml:
--------------------------------------------------------------------------------
1 | group: tempcompass
2 | task:
3 | - tempcompass_multi_choice
4 | - tempcompass_yes_no
5 | - tempcompass_caption_matching
6 | - tempcompass_captioning
7 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/temporalbench/temporalbench.yaml:
--------------------------------------------------------------------------------
1 | group: temporalbench
2 | task:
3 | - temporalbench_short_qa
4 | - temporalbench_long_qa
5 | - temporalbench_short_caption
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/textcaps/_default_template_textcaps_yaml:
--------------------------------------------------------------------------------
1 | lmms_eval_specific_kwargs:
2 |   default:
3 |     prompt: Provide a one-sentence caption for the provided image.


--------------------------------------------------------------------------------
/lmms_eval/tasks/textcaps/textcaps.yaml:
--------------------------------------------------------------------------------
1 | group : textcaps
2 | task:
3 |   - textcaps_val
4 |   - textcaps_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/textvqa/_textvqa.yaml:
--------------------------------------------------------------------------------
1 | group: textvqa
2 | task:
3 | - textvqa_val
4 | - textvqa_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/textvqa/textvqa_test.yaml:
--------------------------------------------------------------------------------
1 | task: textvqa_test
2 | test_split: test
3 | metric_list:
4 |   - metric: submission
5 |     aggregation: !function utils.textvqa_aggregate_submissions
6 |     higher_is_better: true
7 | include: _default_template_textvqa_yaml
8 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/vatex/_vatex.yaml:
--------------------------------------------------------------------------------
1 | group : vatex
2 | task:
3 | - vatex_val_zh
4 | - vatex_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/vdc/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: wchai/lmms_VDC_test
 2 | dataset_kwargs:
 3 |   token: True
 4 |   video: True
 5 |   cache_dir: vdc_test
 6 | 
 7 | metadata:
 8 |   version: 0.0
 9 |   gpt_eval_model_name: gpt-4o-mini
10 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/video_detail_description/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/VideoDetailDescription
 2 | dataset_kwargs:
 3 |   token: True
 4 |   video: True
 5 |   cache_dir: videochatgpt
 6 | lmms_eval_specific_kwargs:
 7 |   default:
 8 |     pre_prompt: ""
 9 |     post_prompt: ""
10 | 
11 | metadata:
12 |   version: 0.0
13 |   gpt_eval_model_name: gpt-3.5-turbo-0613


--------------------------------------------------------------------------------
/lmms_eval/tasks/videochatgpt/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/VideoChatGPT
 2 | dataset_kwargs:
 3 |   token: True
 4 |   video: True
 5 |   cache_dir: videochatgpt
 6 | lmms_eval_specific_kwargs:
 7 |   default:
 8 |     pre_prompt: ""
 9 |     post_prompt: ""
10 | 
11 | metadata:
12 |   version: 0.0
13 |   gpt_eval_model_name: gpt-3.5-turbo-0613


--------------------------------------------------------------------------------
/lmms_eval/tasks/videochatgpt/_videochatgpt.yaml:
--------------------------------------------------------------------------------
1 | group: videochatgpt
2 | task:
3 |   - videochatgpt_gen
4 |   - videochatgpt_temporal
5 |   - videochatgpt_consistency
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/videommmu/video_mmmu.yaml:
--------------------------------------------------------------------------------
1 | group: video_mmmu
2 | task:
3 | - video_mmmu_adaptation
4 | - video_mmmu_comprehension
5 | - video_mmmu_perception
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/vitatecs/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lscpku/VITATECS
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: vitatecs
6 | lmms_eval_specific_kwargs:
7 |   default:
8 |     pre_prompt: ""
9 |     post_prompt: "\nPlease response with a single letter (A or B):"


--------------------------------------------------------------------------------
/lmms_eval/tasks/vitatecs/_vitatecs.yaml:
--------------------------------------------------------------------------------
1 | group: vitatecs
2 | task:
3 | - vitatecs_direction
4 | - vitatecs_intensity
5 | - vitatecs_sequence
6 | - vitatecs_compositionality
7 | - vitatecs_localization
8 | - vitatecs_type
9 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/vizwiz_vqa/_vizwiz_vqa.yaml:
--------------------------------------------------------------------------------
1 | group: vizwiz_vqa
2 | task:
3 | - vizwiz_vqa_val
4 | - vizwiz_vqa_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/vlmsareblind/__init__.py:
--------------------------------------------------------------------------------
1 | # VLMs Are Blind benchmark task
2 | # Tests visual reasoning capabilities through path-counting in subway connection diagrams
3 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/vqav2/_vqav2.yaml:
--------------------------------------------------------------------------------
1 | group: vqav2
2 | task:
3 | - vqav2_val
4 | - vqav2_test


--------------------------------------------------------------------------------
/lmms_eval/tasks/vstar_bench/__init__.py:
--------------------------------------------------------------------------------
1 | # V* Benchmark: Guided Visual Search as a Core Mechanism in Multimodal LLMs
2 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/vstar_bench/vstar_bench.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | metric_list:
3 |   - metric: vstar_overall_acc
4 |     aggregation: !function utils.vstar_aggregate_results
5 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms_eval/tasks/vstar_bench/vstar_bench_direct_attributes.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: "vstar_bench_direct_attributes"
3 | dataset_kwargs:
4 |   category: "direct_attributes"
5 | metric_list:
6 |   - metric: vstar_direct_attributes_acc
7 |     aggregation: !function utils.vstar_aggregate_results
8 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms_eval/tasks/vstar_bench/vstar_bench_relative_position.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template_yaml
2 | task: "vstar_bench_relative_position"
3 | dataset_kwargs:
4 |   category: "relative_position"
5 | metric_list:
6 |   - metric: vstar_relative_position_acc
7 |     aggregation: !function utils.vstar_aggregate_results
8 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms_eval/tasks/websrc/websrc.yaml:
--------------------------------------------------------------------------------
1 | group: websrc
2 | task:
3 | - websrc_val
4 | - websrc_test
5 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/wild_vision_bench/wildvision_bench.yaml:
--------------------------------------------------------------------------------
1 | group: wildvision
2 | task: 
3 |   - wildvision_0617
4 |   - wildvision_0630


--------------------------------------------------------------------------------
/lmms_eval/tasks/worldqa/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/worldqa
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: multi-hop-reasoning 
6 | metadata:
7 |   version: 0.0
8 |   gpt_eval_model_name: "gpt-4-0613"


--------------------------------------------------------------------------------
/lmms_eval/tasks/worldqa/worldqa.yaml:
--------------------------------------------------------------------------------
1 | group: worldqa
2 | task:
3 | - worldqa_gen
4 | - worldqa_mc
5 | 
6 | 


--------------------------------------------------------------------------------
/lmms_eval/tasks/youcook2/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/YouCook2
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: YouCookIIVideos
6 | 


--------------------------------------------------------------------------------
/miscs/example_eval.yaml:
--------------------------------------------------------------------------------
1 | - model: llava
2 |   model_args: pretrained=liuhaotian/llava-v1.5-7b
3 |   tasks: mmmu_val
4 |   batch_size: 1
5 |   log_samples: true
6 |   log_samples_suffix: eval_mmmu
7 |   output_path: "./logs/"
8 | 
9 | 


--------------------------------------------------------------------------------
/miscs/llava_result_check.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EvolvingLMMs-Lab/lmms-eval/d4383329aeaa6ffbcde94a9b31ca0eff7fee557c/miscs/llava_result_check.md


--------------------------------------------------------------------------------
/miscs/llava_sglang_result_check.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EvolvingLMMs-Lab/lmms-eval/d4383329aeaa6ffbcde94a9b31ca0eff7fee557c/miscs/llava_sglang_result_check.md


--------------------------------------------------------------------------------
/miscs/test_scienceqa.py:
--------------------------------------------------------------------------------
1 | from datasets import load_dataset
2 | 
3 | dataset = load_dataset("Otter-AI/ScienceQA", trust_remote_code=True)["test"]
4 | for doc in dataset:
5 |     print(doc["id"])
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 | from setuptools import setup
3 | 
4 | # This is to make sure that the package supports editable installs
5 | if __name__ == "__main__":
6 |     setuptools.setup(
7 |         license_files=["LICENSE"],
8 |     )
9 | 


--------------------------------------------------------------------------------
/tools/lite/embedder/__init__.py:
--------------------------------------------------------------------------------
1 | from .BaseEmbedder import BaseEmbedder
2 | from .ClipBgeEmbedder import ClipBgeEmbedder
3 | 


--------------------------------------------------------------------------------
/tools/lite/shrinker/__init__.py:
--------------------------------------------------------------------------------
1 | from .BaseShrinker import BaseShrinker
2 | from .EmbedShrinker import Embed_Shrinker
3 | 


--------------------------------------------------------------------------------
/tools/live_bench/live_bench/__init__.py:
--------------------------------------------------------------------------------
1 | from .api.live_bench import generate_live_bench, generate_live_bench_from_path
2 | from .data_generator import LiveBench
3 | 


--------------------------------------------------------------------------------
/tools/live_bench/live_bench/data_generator/__init__.py:
--------------------------------------------------------------------------------
1 | from live_bench.data_generator.live_bench import LiveBench
2 | from live_bench.data_generator.live_bench_data import LiveBenchData
3 | from live_bench.data_generator.qa_generator import get_generator, get_random_generator
4 | from live_bench.data_generator.response import Response
5 | 


--------------------------------------------------------------------------------
/tools/live_bench/live_bench/data_generator/example/example_website.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EvolvingLMMs-Lab/lmms-eval/d4383329aeaa6ffbcde94a9b31ca0eff7fee557c/tools/live_bench/live_bench/data_generator/example/example_website.png


--------------------------------------------------------------------------------
/tools/live_bench/live_bench/data_generator/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EvolvingLMMs-Lab/lmms-eval/d4383329aeaa6ffbcde94a9b31ca0eff7fee557c/tools/live_bench/live_bench/data_generator/utils/__init__.py


--------------------------------------------------------------------------------
/tools/live_bench/live_bench/driver/.gitignore:
--------------------------------------------------------------------------------
1 | extensions/
2 | 


--------------------------------------------------------------------------------
/tools/live_bench/live_bench/driver/__init__.py:
--------------------------------------------------------------------------------
1 | from live_bench.driver.load_driver import load_driver
2 | 


--------------------------------------------------------------------------------
/tools/live_bench/live_bench/screen_shoter/__init__.py:
--------------------------------------------------------------------------------
1 | from live_bench.screen_shoter.screen import ScreenImage
2 | from live_bench.screen_shoter.screen_shoter import (
3 |     ScreenShoter,
4 |     get_shoter,
5 |     register_shoter,
6 | )
7 | 


--------------------------------------------------------------------------------
/tools/live_bench/live_bench/websites/__init__.py:
--------------------------------------------------------------------------------
1 | from live_bench.websites.load_website import load_websites, load_websites_from_file
2 | from live_bench.websites.website import Website
3 | 


--------------------------------------------------------------------------------
/tools/live_bench/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 | 
3 | setuptools.setup()
4 | 


--------------------------------------------------------------------------------