├── .gitignore
├── DATA.md
├── LICENSE
├── README.md
├── inference.py
├── inference.sh
├── inference_34b.sh
├── inference_image.py
├── lmms-eval
    ├── lmms_eval
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── api
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── filter.cpython-310.pyc
    │   │   │   ├── instance.cpython-310.pyc
    │   │   │   ├── metrics.cpython-310.pyc
    │   │   │   ├── model.cpython-310.pyc
    │   │   │   ├── registry.cpython-310.pyc
    │   │   │   ├── samplers.cpython-310.pyc
    │   │   │   └── task.cpython-310.pyc
    │   │   ├── filter.py
    │   │   ├── instance.py
    │   │   ├── metrics.py
    │   │   ├── model.py
    │   │   ├── registry.py
    │   │   ├── samplers.py
    │   │   └── task.py
    │   ├── evaluator.py
    │   ├── filters
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── extraction.cpython-310.pyc
    │   │   │   ├── selection.cpython-310.pyc
    │   │   │   └── transformation.cpython-310.pyc
    │   │   ├── decontamination.py
    │   │   ├── extraction.py
    │   │   ├── selection.py
    │   │   └── transformation.py
    │   ├── logging_utils.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   ├── from_log.cpython-310.pyc
    │   │   │   ├── oryx.cpython-310.pyc
    │   │   │   └── oryx_image.cpython-310.pyc
    │   │   ├── from_log.py
    │   │   ├── model_utils
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-310.pyc
    │   │   │   │   └── load_video.cpython-310.pyc
    │   │   │   ├── load_video.py
    │   │   │   └── qwen
    │   │   │   │   └── qwen_generate_utils.py
    │   │   ├── mplug_owl_video
    │   │   │   ├── __init__.py
    │   │   │   ├── configuration_mplug_owl.py
    │   │   │   ├── modeling_mplug_owl.py
    │   │   │   ├── processing_mplug_owl.py
    │   │   │   └── tokenization_mplug_owl.py
    │   │   ├── oryx.py
    │   │   ├── oryx_image.py
    │   │   └── video_chatgpt
    │   │   │   ├── __init__.py
    │   │   │   ├── constants.py
    │   │   │   ├── eval
    │   │   │       ├── __init__.py
    │   │   │       └── model_utils.py
    │   │   │   ├── inference.py
    │   │   │   ├── model
    │   │   │       ├── __init__.py
    │   │   │       ├── consolidate.py
    │   │   │       ├── make_delta.py
    │   │   │       ├── utils.py
    │   │   │       └── video_chatgpt.py
    │   │   │   ├── single_video_inference.py
    │   │   │   ├── utils.py
    │   │   │   └── video_conversation.py
    │   ├── tasks
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   └── __init__.cpython-310.pyc
    │   │   ├── _task_utils
    │   │   │   ├── __pycache__
    │   │   │   │   ├── file_utils.cpython-310.pyc
    │   │   │   │   ├── video_loader.cpython-310.pyc
    │   │   │   │   └── vqa_eval_metric.cpython-310.pyc
    │   │   │   ├── file_utils.py
    │   │   │   ├── gpt_eval_utils.py
    │   │   │   ├── video_loader.py
    │   │   │   └── vqa_eval_metric.py
    │   │   ├── activitynetqa
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── activitynetqa_generation.yaml
    │   │   │   └── utils.py
    │   │   ├── ai2d
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── ai2d.yaml
    │   │   │   ├── upload_ai2d.py
    │   │   │   └── utils.py
    │   │   ├── chartqa
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── chartqa.yaml
    │   │   │   ├── upload_chartqa.py
    │   │   │   └── utils.py
    │   │   ├── cmmmu
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _cmmmu.yaml
    │   │   │   ├── _default_template_cmmmu_yaml
    │   │   │   ├── cmmmu_test.yaml
    │   │   │   ├── cmmmu_val.yaml
    │   │   │   └── utils.py
    │   │   ├── coco_cap
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── coco2014_cap.yaml
    │   │   │   ├── coco2014_cap_test.yaml
    │   │   │   ├── coco2014_cap_val.yaml
    │   │   │   ├── coco2017_cap.yaml
    │   │   │   ├── coco2017_cap_test.yaml
    │   │   │   ├── coco2017_cap_val.yaml
    │   │   │   ├── coco_cap.yaml
    │   │   │   └── utils.py
    │   │   ├── conbench
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── conbench.yaml
    │   │   │   └── utils.py
    │   │   ├── cvrr
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _cvrr.yaml
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── cvrr_fine_grained_action_understanding.yaml
    │   │   │   ├── cvrr_interpretation_of_social_context.yaml
    │   │   │   ├── cvrr_interpretation_of_visual_context.yaml
    │   │   │   ├── cvrr_multiple_actions_in_a_single_video.yaml
    │   │   │   ├── cvrr_non_existent_actions_with_existent_scene_depictions.yaml
    │   │   │   ├── cvrr_non_existent_actions_with_non_existent_scene_depictions.yaml
    │   │   │   ├── cvrr_object_instance_count.yaml
    │   │   │   ├── cvrr_partial_actions.yaml
    │   │   │   ├── cvrr_time_order_understanding.yaml
    │   │   │   ├── cvrr_understanding_emotional_context.yaml
    │   │   │   ├── cvrr_unusual_and_physically_anomalous_activities.yaml
    │   │   │   └── utils.py
    │   │   ├── docvqa
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_docvqa_yaml
    │   │   │   ├── docvqa.yaml
    │   │   │   ├── docvqa_test.yaml
    │   │   │   ├── docvqa_val.yaml
    │   │   │   └── utils.py
    │   │   ├── egoschema
    │   │   │   ├── README.md
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── egoschema.yaml
    │   │   │   ├── egoschema_mcppl.yaml
    │   │   │   ├── egoschema_subset.yaml
    │   │   │   ├── egoschema_subset_mcppl.yaml
    │   │   │   └── utils.py
    │   │   ├── ferret
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── ferret.yaml
    │   │   │   ├── rule.json
    │   │   │   └── utils.py
    │   │   ├── flickr30k
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── flickr30k.yaml
    │   │   │   ├── flickr30k_test.yaml
    │   │   │   └── utils.py
    │   │   ├── gqa
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── gqa.yaml
    │   │   │   └── utils.py
    │   │   ├── hallusion_bench
    │   │   │   ├── __pycache__
    │   │   │   │   ├── evaluate_hb.cpython-310.pyc
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── evaluate_hb.py
    │   │   │   ├── hallusion_bench_image.yaml
    │   │   │   └── utils.py
    │   │   ├── iconqa
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_docvqa_yaml
    │   │   │   ├── iconqa.yaml
    │   │   │   ├── iconqa_test.yaml
    │   │   │   ├── iconqa_val.yaml
    │   │   │   └── utils.py
    │   │   ├── infovqa
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_infovqa_yaml
    │   │   │   ├── infovqa.yaml
    │   │   │   ├── infovqa_test.yaml
    │   │   │   ├── infovqa_val.yaml
    │   │   │   └── utils.py
    │   │   ├── internal_eval
    │   │   │   ├── __pycache__
    │   │   │   │   ├── d170_cn_utils.cpython-310.pyc
    │   │   │   │   ├── d170_en_utils.cpython-310.pyc
    │   │   │   │   ├── dc100_en_utils.cpython-310.pyc
    │   │   │   │   ├── dc200_cn_utils.cpython-310.pyc
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_internal_eval_yaml
    │   │   │   ├── d170_cn.yaml
    │   │   │   ├── d170_cn_utils.py
    │   │   │   ├── d170_en.yaml
    │   │   │   ├── d170_en_utils.py
    │   │   │   ├── dc100_en.yaml
    │   │   │   ├── dc100_en_utils.py
    │   │   │   ├── dc200_cn.yaml
    │   │   │   ├── dc200_cn_utils.py
    │   │   │   ├── internal_eval.yaml
    │   │   │   └── utils.py
    │   │   ├── llava-bench-coco
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── llava-bench-coco.yaml
    │   │   │   ├── rule.json
    │   │   │   └── utils.py
    │   │   ├── llava-in-the-wild
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── llava-in-the-wild.yaml
    │   │   │   ├── rule.json
    │   │   │   └── utils.py
    │   │   ├── llava_wilder
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_wilder_yaml
    │   │   │   ├── llava_wilder_full.yaml
    │   │   │   ├── llava_wilder_medium.yaml
    │   │   │   ├── llava_wilder_small.yaml
    │   │   │   └── utils.py
    │   │   ├── longvideobench
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── longvideobench_test_v.yaml
    │   │   │   ├── longvideobench_val_v.yaml
    │   │   │   └── utils.py
    │   │   ├── mathverse
    │   │   │   ├── __pycache__
    │   │   │   │   ├── mathverse_evals.cpython-310.pyc
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── mathverse.yaml
    │   │   │   ├── mathverse_evals.py
    │   │   │   ├── mathverse_testmini.yaml
    │   │   │   ├── mathverse_testmini_text_dominant.yaml
    │   │   │   ├── mathverse_testmini_text_lite.yaml
    │   │   │   ├── mathverse_testmini_text_only.yaml
    │   │   │   ├── mathverse_testmini_vision_dominant.yaml
    │   │   │   ├── mathverse_testmini_vision_intensive.yaml
    │   │   │   ├── mathverse_testmini_vision_only.yaml
    │   │   │   └── utils.py
    │   │   ├── mathvista
    │   │   │   ├── __pycache__
    │   │   │   │   ├── mathvista_evals.cpython-310.pyc
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── mathvista.yaml
    │   │   │   ├── mathvista_evals.py
    │   │   │   ├── mathvista_test.yaml
    │   │   │   ├── mathvista_testmini.yaml
    │   │   │   └── utils.py
    │   │   ├── mlvu
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── mlvu.yaml
    │   │   │   └── utils.py
    │   │   ├── mmbench
    │   │   │   ├── __pycache__
    │   │   │   │   ├── cc_utils.cpython-310.pyc
    │   │   │   │   ├── cn_utils.cpython-310.pyc
    │   │   │   │   ├── en_utils.cpython-310.pyc
    │   │   │   │   └── mmbench_evals.cpython-310.pyc
    │   │   │   ├── _default_template_mmbench_cn_yaml
    │   │   │   ├── _default_template_mmbench_en_yaml
    │   │   │   ├── cc_utils.py
    │   │   │   ├── cn_utils.py
    │   │   │   ├── en_utils.py
    │   │   │   ├── mmbench.yaml
    │   │   │   ├── mmbench_cc.yaml
    │   │   │   ├── mmbench_cn.yaml
    │   │   │   ├── mmbench_cn_dev.yaml
    │   │   │   ├── mmbench_cn_test.yaml
    │   │   │   ├── mmbench_en.yaml
    │   │   │   ├── mmbench_en_dev.yaml
    │   │   │   ├── mmbench_en_test.yaml
    │   │   │   └── mmbench_evals.py
    │   │   ├── mme
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── mme.yaml
    │   │   │   └── utils.py
    │   │   ├── mmmu
    │   │   │   ├── __pycache__
    │   │   │   │   ├── utils.cpython-310.pyc
    │   │   │   │   └── utils_group_img.cpython-310.pyc
    │   │   │   ├── arial.ttf
    │   │   │   ├── mmmu.yaml
    │   │   │   ├── mmmu_group_img.yaml
    │   │   │   ├── mmmu_group_img_test.yaml
    │   │   │   ├── mmmu_group_img_val.yaml
    │   │   │   ├── mmmu_test.yaml
    │   │   │   ├── mmmu_val.yaml
    │   │   │   ├── utils.py
    │   │   │   └── utils_group_img.py
    │   │   ├── mmupd
    │   │   │   ├── __pycache__
    │   │   │   │   ├── mmupd_evals.cpython-310.pyc
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_mmupd_yaml
    │   │   │   ├── mmaad_base.yaml
    │   │   │   ├── mmaad_instruction.yaml
    │   │   │   ├── mmaad_option.yaml
    │   │   │   ├── mmiasd_base.yaml
    │   │   │   ├── mmiasd_instruction.yaml
    │   │   │   ├── mmiasd_option.yaml
    │   │   │   ├── mmivqd_base.yaml
    │   │   │   ├── mmivqd_instruction.yaml
    │   │   │   ├── mmivqd_option.yaml
    │   │   │   ├── mmupd.yaml
    │   │   │   ├── mmupd_base.yaml
    │   │   │   ├── mmupd_evals.py
    │   │   │   ├── mmupd_instruction.yaml
    │   │   │   ├── mmupd_option.yaml
    │   │   │   └── utils.py
    │   │   ├── mmvet
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── mmvet.yaml
    │   │   │   └── utils.py
    │   │   ├── multidocvqa
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── multidocvqa.yaml
    │   │   │   ├── multidocvqa_test.yaml
    │   │   │   ├── multidocvqa_val.yaml
    │   │   │   └── utils.py
    │   │   ├── multilingual-llava-bench-in-the-wild
    │   │   │   ├── README.md
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template.yaml
    │   │   │   ├── arabic_llava_in_the_wild.yaml
    │   │   │   ├── bengali_llava_in_the_wild.yaml
    │   │   │   ├── chinese_llava_in_the_wild.yaml
    │   │   │   ├── french_llava_in_the_wild.yaml
    │   │   │   ├── hindi_llava_in_the_wild.yaml
    │   │   │   ├── japanese_llava_in_the_wild.yaml
    │   │   │   ├── rule.json
    │   │   │   ├── russian_llava_in_the_wild.yaml
    │   │   │   ├── spanish_llava_in_the_wild.yaml
    │   │   │   ├── urdu_llava_in_the_wild.yaml
    │   │   │   └── utils.py
    │   │   ├── mvbench
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template.yaml
    │   │   │   ├── mvbench.yaml
    │   │   │   ├── mvbench_action_antonym.yaml
    │   │   │   ├── mvbench_action_count.yaml
    │   │   │   ├── mvbench_action_localization.yaml
    │   │   │   ├── mvbench_action_prediction.yaml
    │   │   │   ├── mvbench_action_sequence.yaml
    │   │   │   ├── mvbench_character_order.yaml
    │   │   │   ├── mvbench_counterfactual_inference.yaml
    │   │   │   ├── mvbench_egocentric_navigation.yaml
    │   │   │   ├── mvbench_episodic_reasoning.yaml
    │   │   │   ├── mvbench_fine_grained_action.yaml
    │   │   │   ├── mvbench_fine_grained_pose.yaml
    │   │   │   ├── mvbench_moving_attribute.yaml
    │   │   │   ├── mvbench_moving_count.yaml
    │   │   │   ├── mvbench_moving_direction.yaml
    │   │   │   ├── mvbench_object_existence.yaml
    │   │   │   ├── mvbench_object_interaction.yaml
    │   │   │   ├── mvbench_object_shuffle.yaml
    │   │   │   ├── mvbench_scene_transition.yaml
    │   │   │   ├── mvbench_state_change.yaml
    │   │   │   ├── mvbench_unexpected_action.yaml
    │   │   │   └── utils.py
    │   │   ├── nextqa
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── nextqa.yaml
    │   │   │   ├── nextqa_mc_test.yaml
    │   │   │   ├── nextqa_oe_test.yaml
    │   │   │   ├── nextqa_oe_val.yaml
    │   │   │   ├── stopwords.csv
    │   │   │   └── utils.py
    │   │   ├── nocaps
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_nocaps_yaml
    │   │   │   ├── nocaps.yaml
    │   │   │   ├── nocaps_test.yaml
    │   │   │   ├── nocaps_val.yaml
    │   │   │   └── utils.py
    │   │   ├── ocrbench
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── ocrbench.yaml
    │   │   │   ├── upload_ocrbench.py
    │   │   │   └── utils.py
    │   │   ├── ok_vqa
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_vqa_yaml
    │   │   │   ├── _generate_config.py
    │   │   │   ├── _ok_vqa.yaml
    │   │   │   ├── ok_vqa_val2014.yaml
    │   │   │   └── utils.py
    │   │   ├── olympiadbench
    │   │   │   ├── __pycache__
    │   │   │   │   ├── cn_utils.cpython-310.pyc
    │   │   │   │   ├── en_utils.cpython-310.pyc
    │   │   │   │   └── olympiadbench_evals.cpython-310.pyc
    │   │   │   ├── cn_utils.py
    │   │   │   ├── en_utils.py
    │   │   │   ├── olympiadbench.yaml
    │   │   │   ├── olympiadbench_evals.py
    │   │   │   ├── olympiadbench_test_cn.yaml
    │   │   │   └── olympiadbench_test_en.yaml
    │   │   ├── perceptiontest
    │   │   │   ├── test
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   │   ├── _default_template_yaml
    │   │   │   │   ├── perceptiontest_mc.yaml
    │   │   │   │   ├── perceptiontest_mcppl.yaml
    │   │   │   │   └── utils.py
    │   │   │   └── val
    │   │   │   │   ├── __pycache__
    │   │   │   │       └── utils.cpython-310.pyc
    │   │   │   │   ├── _default_template_yaml
    │   │   │   │   ├── perceptiontest_mc.yaml
    │   │   │   │   ├── perceptiontest_mcppl.yaml
    │   │   │   │   └── utils.py
    │   │   ├── pope
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── pope.yaml
    │   │   │   ├── pope_adv.yaml
    │   │   │   ├── pope_full.yaml
    │   │   │   ├── pope_pop.yaml
    │   │   │   ├── pope_random.yaml
    │   │   │   └── utils.py
    │   │   ├── realworldqa
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── realworldqa.yaml
    │   │   │   └── utils.py
    │   │   ├── refcoco+
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_bbox_yaml
    │   │   │   ├── _default_template_seg_yaml
    │   │   │   ├── _generate_config.py
    │   │   │   ├── _refcoco.yaml
    │   │   │   ├── refcoco+_bbox_testA.yaml
    │   │   │   ├── refcoco+_bbox_testB.yaml
    │   │   │   ├── refcoco+_bbox_val.yaml
    │   │   │   ├── refcoco+_seg_testA.yaml
    │   │   │   ├── refcoco+_seg_testB.yaml
    │   │   │   ├── refcoco+_seg_val.yaml
    │   │   │   └── utils.py
    │   │   ├── refcoco
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_bbox_yaml
    │   │   │   ├── _default_template_seg_yaml
    │   │   │   ├── _generate_config.py
    │   │   │   ├── _refcoco.yaml
    │   │   │   ├── refcoco_bbox_test.yaml
    │   │   │   ├── refcoco_bbox_testA.yaml
    │   │   │   ├── refcoco_bbox_testB.yaml
    │   │   │   ├── refcoco_bbox_val.yaml
    │   │   │   ├── refcoco_seg_test.yaml
    │   │   │   ├── refcoco_seg_testA.yaml
    │   │   │   ├── refcoco_seg_testB.yaml
    │   │   │   ├── refcoco_seg_val.yaml
    │   │   │   └── utils.py
    │   │   ├── refcocog
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_bbox_yaml
    │   │   │   ├── _default_template_seg_yaml
    │   │   │   ├── _generate_config.py
    │   │   │   ├── _refcoco.yaml
    │   │   │   ├── refcocog_bbox_test.yaml
    │   │   │   ├── refcocog_bbox_val.yaml
    │   │   │   ├── refcocog_seg_test.yaml
    │   │   │   ├── refcocog_seg_val.yaml
    │   │   │   └── utils.py
    │   │   ├── scienceqa
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── scienceqa.yaml
    │   │   │   ├── scienceqa_full.yaml
    │   │   │   ├── scienceqa_img.yaml
    │   │   │   └── utils.py
    │   │   ├── screenspot
    │   │   │   ├── README.md
    │   │   │   ├── __pycache__
    │   │   │   │   ├── utils.cpython-310.pyc
    │   │   │   │   └── utils_rec.cpython-310.pyc
    │   │   │   ├── _default_template_rec_yaml
    │   │   │   ├── _default_template_reg_yaml
    │   │   │   ├── _screenspot.yaml
    │   │   │   ├── screenspot_rec_test.yaml
    │   │   │   ├── screenspot_reg_test.yaml
    │   │   │   ├── utils.py
    │   │   │   └── utils_rec.py
    │   │   ├── seedbench
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── seedbench.yaml
    │   │   │   ├── seedbench_ppl.yaml
    │   │   │   └── utils.py
    │   │   ├── seedbench_2
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── seedbench_2.yaml
    │   │   │   └── utils.py
    │   │   ├── stvqa
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── stvqa.yaml
    │   │   │   └── utils.py
    │   │   ├── synthdog
    │   │   │   ├── __pycache__
    │   │   │   │   ├── donut_evaluator.cpython-310.pyc
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── donut_evaluator.py
    │   │   │   ├── synthdog.yaml
    │   │   │   ├── synthdog_en.yaml
    │   │   │   ├── synthdog_zh.yaml
    │   │   │   └── utils.py
    │   │   ├── textcaps
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_textcaps_yaml
    │   │   │   ├── textcaps.yaml
    │   │   │   ├── textcaps_test.yaml
    │   │   │   ├── textcaps_train.yaml
    │   │   │   ├── textcaps_val.yaml
    │   │   │   └── utils.py
    │   │   ├── textvqa
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_textvqa_yaml
    │   │   │   ├── _textvqa.yaml
    │   │   │   ├── textvqa_test.yaml
    │   │   │   ├── textvqa_val.yaml
    │   │   │   └── utils.py
    │   │   ├── vatex
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _vatex.yaml
    │   │   │   ├── utils.py
    │   │   │   ├── vatex_test.yaml
    │   │   │   └── vatex_val_zh.yaml
    │   │   ├── vcr_wiki
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_vcr_yaml
    │   │   │   ├── utils.py
    │   │   │   ├── vcr_wiki_en_easy.yaml
    │   │   │   ├── vcr_wiki_en_easy_100.yaml
    │   │   │   ├── vcr_wiki_en_easy_500.yaml
    │   │   │   ├── vcr_wiki_en_hard.yaml
    │   │   │   ├── vcr_wiki_en_hard_100.yaml
    │   │   │   ├── vcr_wiki_en_hard_500.yaml
    │   │   │   ├── vcr_wiki_zh_easy.yaml
    │   │   │   ├── vcr_wiki_zh_easy_100.yaml
    │   │   │   ├── vcr_wiki_zh_easy_500.yaml
    │   │   │   ├── vcr_wiki_zh_hard.yaml
    │   │   │   ├── vcr_wiki_zh_hard_100.yaml
    │   │   │   └── vcr_wiki_zh_hard_500.yaml
    │   │   ├── video_detail_description
    │   │   │   ├── README.md
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── utils.py
    │   │   │   └── video_detail_description.yaml
    │   │   ├── videochatgpt
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── _videochatgpt.yaml
    │   │   │   ├── utils.py
    │   │   │   ├── videochatgpt_consistency.yaml
    │   │   │   ├── videochatgpt_generic.yaml
    │   │   │   └── videochatgpt_temporal.yaml
    │   │   ├── videomme
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── utils.py
    │   │   │   ├── videomme.yaml
    │   │   │   └── videomme_w_subtitle.yaml
    │   │   ├── videomme_fix
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── utils.py
    │   │   │   ├── videomme.yaml
    │   │   │   └── videomme_w_subtitle.yaml
    │   │   ├── vizwiz_vqa
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_vqa_yaml
    │   │   │   ├── _generate_config.py
    │   │   │   ├── _vizwiz_vqa.yaml
    │   │   │   ├── utils.py
    │   │   │   ├── vizwiz_vqa_test.yaml
    │   │   │   └── vizwiz_vqa_val.yaml
    │   │   ├── vqav2
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_vqav2_yaml
    │   │   │   ├── _vqav2.yaml
    │   │   │   ├── utils.py
    │   │   │   ├── vqav2_test.yaml
    │   │   │   └── vqav2_val.yaml
    │   │   ├── websrc
    │   │   │   ├── README.md
    │   │   │   ├── __pycache__
    │   │   │   │   └── utils.cpython-310.pyc
    │   │   │   ├── utils.py
    │   │   │   ├── websrc.yaml
    │   │   │   ├── websrc_test.yaml
    │   │   │   └── websrc_val.yaml
    │   │   ├── worldqa
    │   │   │   ├── __pycache__
    │   │   │   │   ├── utils.cpython-310.pyc
    │   │   │   │   └── worldqa_mc_evaluator.cpython-310.pyc
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── utils.py
    │   │   │   ├── worldqa.yaml
    │   │   │   ├── worldqa_generation.yaml
    │   │   │   ├── worldqa_mc.yaml
    │   │   │   ├── worldqa_mc_evaluator.py
    │   │   │   └── worldqa_mcppl.yaml
    │   │   └── youcook2
    │   │   │   ├── __pycache__
    │   │   │       └── utils.cpython-310.pyc
    │   │   │   ├── _default_template_yaml
    │   │   │   ├── utils.py
    │   │   │   └── youcook2_val.yaml
    │   └── utils.py
    ├── pyproject.toml
    └── setup.py
├── oryx
    ├── __init__.py
    ├── constants.py
    ├── conversation.py
    ├── mm_utils.py
    ├── model
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── builder.cpython-310.pyc
    │   │   └── oryx_arch.cpython-310.pyc
    │   ├── builder.py
    │   ├── language_model
    │   │   ├── __pycache__
    │   │   │   ├── oryx_llama.cpython-310.pyc
    │   │   │   └── oryx_qwen.cpython-310.pyc
    │   │   ├── oryx_llama.py
    │   │   └── oryx_qwen.py
    │   ├── multimodal_encoder
    │   │   ├── __pycache__
    │   │   │   ├── builder.cpython-310.pyc
    │   │   │   └── oryx_vit.cpython-310.pyc
    │   │   ├── builder.py
    │   │   └── oryx_vit.py
    │   ├── multimodal_projector
    │   │   ├── __pycache__
    │   │   │   └── builder.cpython-310.pyc
    │   │   └── builder.py
    │   ├── multimodal_resampler
    │   │   ├── __pycache__
    │   │   │   ├── builder.cpython-310.pyc
    │   │   │   ├── masked_drop.cpython-310.pyc
    │   │   │   ├── perceiver.cpython-310.pyc
    │   │   │   ├── qformer.cpython-310.pyc
    │   │   │   ├── spatial_pool.cpython-310.pyc
    │   │   │   └── vlm_attention.cpython-310.pyc
    │   │   ├── builder.py
    │   │   ├── masked_drop.py
    │   │   ├── perceiver.py
    │   │   ├── qformer.py
    │   │   ├── spatial_pool.py
    │   │   └── vlm_attention.py
    │   └── oryx_arch.py
    ├── train
    │   ├── llama_flash_attn_monkey_patch.py
    │   ├── oryx_trainer.py
    │   ├── train.py
    │   └── train_mem.py
    └── utils.py
├── pyproject.toml
└── scripts
    ├── eval_image.sh
    ├── eval_video.sh
    ├── train_oryx_34b.sh
    ├── train_oryx_7b.sh
    ├── zero2.json
    ├── zero3.json
    └── zero3_offload.json


/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info/
2 | __pycache__/
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Zuyan Liu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/inference.sh:
--------------------------------------------------------------------------------
1 | export LOWRES_RESIZE=384x32
2 | export VIDEO_RESIZE="0x64"
3 | export HIGHRES_BASE="0x32"
4 | export MAXRES=1536
5 | export MINRES=0
6 | export VIDEO_MAXRES=480
7 | export VIDEO_MINRES=288
8 | 
9 | python inference.py


--------------------------------------------------------------------------------
/inference_34b.sh:
--------------------------------------------------------------------------------
 1 | export LOWRES_RESIZE=384x32
 2 | export VIDEO_RESIZE="0x64"
 3 | export HIGHRES_BASE="0x32"
 4 | export MAXRES=1536
 5 | export MINRES=0
 6 | export VIDEO_MAXRES=480
 7 | export VIDEO_MINRES=288
 8 | 
 9 | export EVAL_LARGE=1
10 | 
11 | python inference.py


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/__init__.py


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/api/__init__.py


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/api/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/api/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/api/__pycache__/filter.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/api/__pycache__/filter.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/api/__pycache__/instance.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/api/__pycache__/instance.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/api/__pycache__/metrics.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/api/__pycache__/metrics.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/api/__pycache__/model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/api/__pycache__/model.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/api/__pycache__/registry.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/api/__pycache__/registry.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/api/__pycache__/samplers.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/api/__pycache__/samplers.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/api/__pycache__/task.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/api/__pycache__/task.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/api/instance.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from typing import Literal, Tuple
 3 | 
 4 | 
 5 | @dataclass
 6 | class Instance:
 7 |     request_type: Literal["loglikelihood", "generate_until"]
 8 |     arguments: tuple
 9 |     idx: int
10 |     metadata: Tuple[str, int, int] = field(default_factory=lambda: (None, None, None))  # TODO: better typehints here
11 |     resps: list = field(default_factory=list)
12 |     filtered_resps: dict = field(default_factory=dict)
13 | 
14 |     # initialized after init
15 |     task_name: str = None
16 |     doc_id: str = None
17 |     repeats: str = None
18 |     doc: dict = None
19 | 
20 |     def __post_init__(self) -> None:
21 |         # unpack metadata field
22 |         self.task_name, self.doc_id, self.repeats = self.metadata
23 | 
24 |     @property
25 |     def args(self):
26 |         """
27 |         Returns (string,) where `string` is the string to calculate loglikelihood over
28 |         """
29 |         return self.arguments if isinstance(self.arguments, tuple) else (self.arguments,)
30 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/filters/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/filters/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/filters/__pycache__/extraction.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/filters/__pycache__/extraction.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/filters/__pycache__/selection.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/filters/__pycache__/selection.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/filters/__pycache__/transformation.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/filters/__pycache__/transformation.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/filters/decontamination.py:
--------------------------------------------------------------------------------
 1 | from lmms_eval.api.filter import Filter
 2 | 
 3 | 
 4 | class DecontaminationFilter(Filter):
 5 |     """
 6 |     A filter which evaluates
 7 |     """
 8 | 
 9 |     name = "track_decontamination"
10 | 
11 |     def __init__(self, path) -> None:
12 |         """
13 | 
14 |         TODO: make sure only ever run one time on the train set (should this be cached as a class var? keyed by value for "path").
15 |         should further cache result on a given (task_name, doc_id)
16 |         """
17 |         self._decontam_results = None
18 | 
19 |     def apply(self, resps, docs) -> None:
20 |         """
21 |         Return {"no_contamination", "only_contamination"} keys for the 2 different subsets
22 |         """
23 |         pass
24 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import hf_transfer
 3 | 
 4 | os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 5 | 
 6 | AVAILABLE_MODELS = {
 7 |     "oryx": "Oryx",
 8 |     "oryx_image": "OryxImage",
 9 |     "from_log": "FromLog",
10 | }
11 | 
12 | for model_name, model_class in AVAILABLE_MODELS.items():
13 |     try:
14 |         exec(f"from .{model_name} import {model_class}")
15 |     except ImportError as e:
16 |         print(model_name, e)
17 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/models/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/__pycache__/from_log.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/models/__pycache__/from_log.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/__pycache__/oryx.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/models/__pycache__/oryx.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/__pycache__/oryx_image.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/models/__pycache__/oryx_image.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/model_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/models/model_utils/__init__.py


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/model_utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/models/model_utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/model_utils/__pycache__/load_video.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/models/model_utils/__pycache__/load_video.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/video_chatgpt/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import VideoChatGPTLlamaForCausalLM
2 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/video_chatgpt/constants.py:
--------------------------------------------------------------------------------
 1 | CONTROLLER_HEART_BEAT_EXPIRATION = 30
 2 | WORKER_HEART_BEAT_INTERVAL = 15
 3 | 
 4 | LOGDIR = "."
 5 | 
 6 | 
 7 | # Defining model
 8 | DEFAULT_VIDEO_TOKEN = "<video>"
 9 | DEFAULT_VIDEO_PATCH_TOKEN = "<vid_patch>"
10 | DEFAULT_VID_START_TOKEN = "<vid_start>"
11 | DEFAULT_VID_END_TOKEN = "<vid_end>"
12 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/video_chatgpt/eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/models/video_chatgpt/eval/__init__.py


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/video_chatgpt/model/__init__.py:
--------------------------------------------------------------------------------
1 | from lmms_eval.models.video_chatgpt.model.video_chatgpt import VideoChatGPTLlamaForCausalLM, VideoChatGPTConfig
2 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/video_chatgpt/model/consolidate.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Usage:
 3 | python3 -m llava.model.consolidate --src ~/model_weights/llava-7b --dst ~/model_weights/llava-7b_consolidate
 4 | """
 5 | 
 6 | import argparse
 7 | 
 8 | import torch
 9 | from transformers import AutoTokenizer, AutoModelForCausalLM
10 | from lmms_eval.models.video_chatgpt.model import *
11 | 
12 | 
13 | def consolidate_ckpt(src_path, dst_path):
14 |     print("Loading model")
15 |     src_model = AutoModelForCausalLM.from_pretrained(src_path, torch_dtype=torch.float16, low_cpu_mem_usage=True)
16 |     src_tokenizer = AutoTokenizer.from_pretrained(src_path)
17 |     src_model.save_pretrained(dst_path)
18 |     src_tokenizer.save_pretrained(dst_path)
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     parser = argparse.ArgumentParser()
23 |     parser.add_argument("--src", type=str, required=True)
24 |     parser.add_argument("--dst", type=str, required=True)
25 | 
26 |     args = parser.parse_args()
27 | 
28 |     consolidate_ckpt(args.src, args.dst)
29 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/models/video_chatgpt/model/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from lmms_eval.models.video_chatgpt.model import *
 3 | from transformers import StoppingCriteria
 4 | 
 5 | 
 6 | class KeywordsStoppingCriteria(StoppingCriteria):
 7 |     def __init__(self, keywords, tokenizer, input_ids):
 8 |         self.keywords = keywords
 9 |         self.keyword_ids = [tokenizer(keyword).input_ids for keyword in keywords]
10 |         self.keyword_ids = [keyword_id[0] for keyword_id in self.keyword_ids if type(keyword_id) is list and len(keyword_id) == 1]
11 |         self.tokenizer = tokenizer
12 |         self.start_len = None
13 |         self.input_ids = input_ids
14 | 
15 |     def __call__(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
16 |         if self.start_len is None:
17 |             self.start_len = self.input_ids.shape[1]
18 |         else:
19 |             for keyword_id in self.keyword_ids:
20 |                 if output_ids[0, -1] == keyword_id:
21 |                     return True
22 |             outputs = self.tokenizer.batch_decode(output_ids[:, self.start_len :], skip_special_tokens=True)[0]
23 |             for keyword in self.keywords:
24 |                 if keyword in outputs:
25 |                     return True
26 |         return False
27 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/_task_utils/__pycache__/file_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/_task_utils/__pycache__/file_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/_task_utils/__pycache__/video_loader.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/_task_utils/__pycache__/video_loader.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/_task_utils/__pycache__/vqa_eval_metric.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/_task_utils/__pycache__/vqa_eval_metric.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/_task_utils/file_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | 
4 | def generate_submission_file(file_name, args, subpath="submissions"):
5 |     path = os.path.join(args.output_path, subpath)
6 |     os.makedirs(path, exist_ok=True)
7 |     path = os.path.join(path, file_name)
8 |     return os.path.abspath(path)
9 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/_task_utils/gpt_eval_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/_task_utils/gpt_eval_utils.py


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/_task_utils/video_loader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def get_cache_dir(config, sub_dir="videos"):
 5 |     HF_HOME = os.environ["HF_HOME"]
 6 |     cache_dir = config["dataset_kwargs"]["cache_dir"]
 7 |     cache_dir = os.path.join(HF_HOME, cache_dir)
 8 |     cache_dir = os.path.join(cache_dir, sub_dir)
 9 |     return cache_dir
10 | 
11 | 
12 | def _get_video_file(prefix: str, video_name: str, suffix: str):
13 |     if not isinstance(video_name, str):
14 |         video_name = str(video_name)
15 |     if not video_name.endswith(suffix):
16 |         video_name = f"{video_name}.{suffix}"
17 |     video_path = os.path.join(prefix, video_name)
18 |     return video_path
19 | 
20 | 
21 | def get_video(prefix: str, video_name: str, suffix: str = "mp4"):
22 |     tried = [os.path.abspath(_get_video_file(prefix, video_name, suffix)), os.path.abspath(_get_video_file(prefix, video_name, suffix.upper())), os.path.abspath(_get_video_file(prefix, video_name, suffix.lower()))]
23 |     for video_path in tried:
24 |         if os.path.exists(video_path):
25 |             return video_path
26 |     raise FileNotFoundError(f"Tried both {tried} but none of them exist, please check")
27 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/activitynetqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/activitynetqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/activitynetqa/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/ActivityNetQA
 2 | dataset_kwargs:
 3 |   token: True
 4 |   video: True
 5 |   force_download: False
 6 |   local_files_only: False
 7 |   cache_dir: activitynetqa
 8 | model_specific_prompt_kwargs:
 9 |   default:
10 |     pre_prompt: ""
11 |     post_prompt: " Answer the question using a single word or phrase."
12 | 
13 | metadata:
14 |   version: 0.0
15 |   gpt_eval_model_name: gpt-3.5-0613


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/activitynetqa/activitynetqa_generation.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "Generation"
 2 | task: "activitynetqa"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.activitynetqa_doc_to_visual
 6 | doc_to_text: !function utils.activitynetqa_doc_to_text
 7 | doc_to_target: !function utils.activitynetqa_doc_to_answer
 8 | process_results: !function utils.activitynetqa_process_results # gpt eval here for each QA pairs
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.activitynetqa_aggregate_score # parse scores from each QA pairs
12 |     higher_is_better: true
13 |   - metric: gpt_eval_accuracy
14 |     aggregation: !function utils.activitynetqa_aggregate_accuracy # parse accuracy from each QA pairs
15 |     higher_is_better: true
16 | 
17 | include: _default_template_yaml
18 | 
19 | generation_kwargs:
20 |   until:
21 |     - "ASSISTANT:"
22 |   image_aspect_ratio: original
23 |   max_new_tokens: 64
24 |   temperature: 0
25 |   top_p: 1.0
26 |   num_beams: 1
27 |   do_sample: false
28 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/ai2d/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/ai2d/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/chartqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/chartqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/chartqa/chartqa.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/ChartQA
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "chartqa"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.chartqa_doc_to_visual
 8 | doc_to_text: !function utils.chartqa_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   max_new_tokens: 16
12 |   temperature: 0
13 |   do_sample: False
14 | process_results: !function utils.chartqa_process_results
15 | metric_list:
16 |   - metric: relaxed_overall
17 |     aggregation: mean
18 |     higher_is_better: true
19 |   - metric: relaxed_human_split
20 |     aggregation: mean
21 |     higher_is_better: true
22 |   - metric: relaxed_augmented_split
23 |     aggregation: mean
24 |     higher_is_better: true
25 | metadata:
26 |   - version: 0.0
27 | model_specific_prompt_kwargs:
28 |   default:
29 |     pre_prompt: ""
30 |     post_prompt: "\nAnswer the question with a single word."
31 |   qwen_vl:
32 |     pre_prompt: ""
33 |     post_prompt: " Answer:"
34 | 
35 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cmmmu/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/cmmmu/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cmmmu/_cmmmu.yaml:
--------------------------------------------------------------------------------
1 | group: cmmmu
2 | task:
3 | - cmmmu_val
4 | - cmmmu_test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cmmmu/_default_template_cmmmu_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/CMMMU
2 | output_type: generate_until
3 | doc_to_visual: !function utils.cmmmu_doc_to_visual
4 | doc_to_text: !function utils.cmmmu_doc_to_text
5 | doc_to_target: "answer"
6 | generation_kwargs:
7 |   max_new_tokens: 16
8 |   image_aspect_ratio: original


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cmmmu/cmmmu_test.yaml:
--------------------------------------------------------------------------------
 1 | task: "cmmmu_test"
 2 | test_split: test
 3 | # The return value of process_results will be used by metrics
 4 | process_results: !function utils.cmmmu_process_test_results_for_submission
 5 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
 6 | metric_list:
 7 |   - metric: submission
 8 |     aggregation: !function utils.cmmmu_test_aggregate_results_for_submission
 9 |     higher_is_better: false
10 | metadata:
11 |   - version: 0.0
12 | include: _default_template_cmmmu_yaml
13 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cmmmu/cmmmu_val.yaml:
--------------------------------------------------------------------------------
 1 | task: "cmmmu_val"
 2 | test_split: val
 3 | # The return value of process_results will be used by metrics
 4 | process_results: !function utils.cmmmu_process_results
 5 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
 6 | generation_kwargs:
 7 |   max_new_tokens: 16
 8 |   image_aspect_ratio: original
 9 | metric_list:
10 |   - metric: cmmmu_acc
11 |     aggregation: !function utils.cmmmu_aggregate_results
12 |     higher_is_better: true
13 | metadata:
14 |   - version: 0.0
15 | include: _default_template_cmmmu_yaml
16 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/coco_cap/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/coco_cap/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/coco_cap/coco2014_cap.yaml:
--------------------------------------------------------------------------------
1 | group : coco2014_cap
2 | task:
3 |   - coco2014_cap_val
4 |   - coco2014_cap_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/coco_cap/coco2014_cap_test.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/COCO-Caption
 2 | dataset_kwargs:
 3 |   token: True
 4 | task : "coco2014_cap_test"
 5 | group : "coco_caption"
 6 | test_split: test
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.coco_doc_to_visual
 9 | doc_to_text: "Provide a one-sentence caption for the provided image."
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   max_new_tokens: 128
13 |   temperature: 0
14 |   top_p: 1.0
15 |   num_beams: 1
16 |   do_sample: false
17 | process_results: !function utils.coco_test_process_result
18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
19 | metric_list:
20 |   - metric: coco_passthrough 
21 |     aggregation : !function utils.coco_test_aggregation_result
22 |     higher_is_better : true
23 | metadata:
24 |   - version: 0.0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/coco_cap/coco2017_cap.yaml:
--------------------------------------------------------------------------------
1 | group : coco2017_cap
2 | task:
3 |   - coco2017_cap_val
4 |   - coco2017_cap_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/coco_cap/coco2017_cap_test.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/COCO-Caption2017
 2 | dataset_kwargs:
 3 |   token: True
 4 | task : "coco2017_cap_test"
 5 | group : "coco_caption2017"
 6 | test_split: test
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.coco_doc_to_visual
 9 | doc_to_text: !function utils.coco_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   max_new_tokens: 128
13 |   temperature: 0
14 |   top_p: 1.0
15 |   num_beams: 1
16 |   do_sample: false
17 | process_results: !function utils.coco_test_process_result
18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
19 | metric_list:
20 |   - metric: coco_passthrough 
21 |     aggregation : !function utils.coco_test_aggregation_result
22 |     higher_is_better : true
23 | metadata:
24 |   - version: 0.0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/coco_cap/coco_cap.yaml:
--------------------------------------------------------------------------------
1 | group : coco_cap
2 | task:
3 |   - coco2014_cap_val
4 |   - coco2014_cap_test
5 |   - coco2017_cap_val
6 |   - coco2017_cap_test
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/conbench/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/conbench/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/conbench/conbench.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: ConBench/ConBench_D
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "ConBench"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.conbench_doc_to_visual
 8 | doc_to_text: !function utils.conbench_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   max_new_tokens: 1024
12 |   temperature: 0.2
13 |   top_p: 0
14 |   num_beams: 1
15 |   do_sample: True
16 | # The return value of process_results will be used by metrics
17 | process_results: !function utils.conbench_process_results
18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
19 | metric_list:
20 |   - metric: ConScore_D
21 |     aggregation: !function utils.conbench_aggregate_results
22 |     higher_is_better: true
23 | metadata:
24 |   - version: 0.0
25 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cvrr/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/cvrr/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cvrr/_cvrr.yaml:
--------------------------------------------------------------------------------
 1 | group: cvrr
 2 | task:
 3 | - cvrr_continuity_and_object_instance_count
 4 | - cvrr_fine_grained_action_understanding
 5 | - cvrr_interpretation_of_social_context
 6 | - cvrr_interpretation_of_visual_context
 7 | - cvrr_multiple_actions_in_a_single_video
 8 | - cvrr_non_existent_actions_with_existent_scene_depictions
 9 | - cvrr_non_existent_actions_with_non_existent_scene_depictions
10 | - cvrr_partial_actions
11 | - cvrr_time_order_understanding
12 | - cvrr_understanding_emotional_context
13 | - cvrr_unusual_and_physically_anomalous_activities
14 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cvrr/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/CVRR-ES
 2 | dataset_kwargs:
 3 |   token: True
 4 |   video: True
 5 |   cache_dir: cvrr-es
 6 | model_specific_prompt_kwargs:
 7 |   default:
 8 |     pre_prompt: ""
 9 |     post_prompt: ""
10 | 
11 | metadata:
12 |   version: 0.0
13 |   gpt_eval_model_name: gpt-3.5-turbo-0125


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cvrr/cvrr_fine_grained_action_understanding.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "fine_grained_action_understanding"
 2 | task: "cvrr_fine_grained_action_understanding"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.cvrr_doc_to_visual
 6 | doc_to_text: !function utils.cvrr_doc_to_text
 7 | doc_to_target: !function utils.cvrr_doc_to_answer
 8 | process_results: !function utils.cvrr_process_results
 9 | metric_list:
10 |   - metric: gpt_eval_accuracy
11 |     aggregation: !function utils.cvrr_aggregate_accuracy
12 |     higher_is_better: true
13 |   - metric: gpt_eval_score
14 |     aggregation: !function utils.cvrr_aggregate_score
15 |     higher_is_better: true
16 | include: _default_template_yaml
17 | 
18 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cvrr/cvrr_interpretation_of_social_context.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "interpretation_of_social_context"
 2 | task: "cvrr_interpretation_of_social_context"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.cvrr_doc_to_visual
 6 | doc_to_text: !function utils.cvrr_doc_to_text
 7 | doc_to_target: !function utils.cvrr_doc_to_answer
 8 | process_results: !function utils.cvrr_process_results
 9 | metric_list:
10 |   - metric: gpt_eval_accuracy
11 |     aggregation: !function utils.cvrr_aggregate_accuracy
12 |     higher_is_better: true
13 |   - metric: gpt_eval_score
14 |     aggregation: !function utils.cvrr_aggregate_score
15 |     higher_is_better: true
16 | include: _default_template_yaml
17 | 
18 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cvrr/cvrr_interpretation_of_visual_context.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "interpretation_of_visual_context"
 2 | task: "cvrr_interpretation_of_visual_context"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.cvrr_doc_to_visual
 6 | doc_to_text: !function utils.cvrr_doc_to_text
 7 | doc_to_target: !function utils.cvrr_doc_to_answer
 8 | process_results: !function utils.cvrr_process_results
 9 | metric_list:
10 |   - metric: gpt_eval_accuracy
11 |     aggregation: !function utils.cvrr_aggregate_accuracy
12 |     higher_is_better: true
13 |   - metric: gpt_eval_score
14 |     aggregation: !function utils.cvrr_aggregate_score
15 |     higher_is_better: true
16 | include: _default_template_yaml
17 | 
18 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cvrr/cvrr_multiple_actions_in_a_single_video.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "multiple_actions_in_a_single_video"
 2 | task: "cvrr_multiple_actions_in_a_single_video"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.cvrr_doc_to_visual
 6 | doc_to_text: !function utils.cvrr_doc_to_text
 7 | doc_to_target: !function utils.cvrr_doc_to_answer
 8 | process_results: !function utils.cvrr_process_results
 9 | metric_list:
10 |   - metric: gpt_eval_accuracy
11 |     aggregation: !function utils.cvrr_aggregate_accuracy
12 |     higher_is_better: true
13 |   - metric: gpt_eval_score
14 |     aggregation: !function utils.cvrr_aggregate_score
15 |     higher_is_better: true
16 | include: _default_template_yaml
17 | 
18 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cvrr/cvrr_non_existent_actions_with_existent_scene_depictions.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "non_existent_actions_with_existent_scene_depictions"
 2 | task: "cvrr_non_existent_actions_with_existent_scene_depictions"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.cvrr_doc_to_visual
 6 | doc_to_text: !function utils.cvrr_doc_to_text
 7 | doc_to_target: !function utils.cvrr_doc_to_answer
 8 | process_results: !function utils.cvrr_process_results
 9 | metric_list:
10 |   - metric: gpt_eval_accuracy
11 |     aggregation: !function utils.cvrr_aggregate_accuracy
12 |     higher_is_better: true
13 |   - metric: gpt_eval_score
14 |     aggregation: !function utils.cvrr_aggregate_score
15 |     higher_is_better: true
16 | include: _default_template_yaml
17 | 
18 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cvrr/cvrr_non_existent_actions_with_non_existent_scene_depictions.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "non_existent_actions_with_non_existent_scene_depictions"
 2 | task: "cvrr_non_existent_actions_with_non_existent_scene_depictions"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.cvrr_doc_to_visual
 6 | doc_to_text: !function utils.cvrr_doc_to_text
 7 | doc_to_target: !function utils.cvrr_doc_to_answer
 8 | process_results: !function utils.cvrr_process_results
 9 | metric_list:
10 |   - metric: gpt_eval_accuracy
11 |     aggregation: !function utils.cvrr_aggregate_accuracy
12 |     higher_is_better: true
13 |   - metric: gpt_eval_score
14 |     aggregation: !function utils.cvrr_aggregate_score
15 |     higher_is_better: true
16 | include: _default_template_yaml
17 | 
18 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cvrr/cvrr_object_instance_count.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "continuity_and_object_instance_count"
 2 | task: "cvrr_continuity_and_object_instance_count"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.cvrr_doc_to_visual
 6 | doc_to_text: !function utils.cvrr_doc_to_text
 7 | doc_to_target: !function utils.cvrr_doc_to_answer
 8 | process_results: !function utils.cvrr_process_results
 9 | metric_list:
10 |   - metric: gpt_eval_accuracy
11 |     aggregation: !function utils.cvrr_aggregate_accuracy
12 |     higher_is_better: true
13 |   - metric: gpt_eval_score
14 |     aggregation: !function utils.cvrr_aggregate_score
15 |     higher_is_better: true
16 | include: _default_template_yaml
17 | 
18 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cvrr/cvrr_partial_actions.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "partial_actions"
 2 | task: "cvrr_partial_actions"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.cvrr_doc_to_visual
 6 | doc_to_text: !function utils.cvrr_doc_to_text
 7 | doc_to_target: !function utils.cvrr_doc_to_answer
 8 | process_results: !function utils.cvrr_process_results
 9 | metric_list:
10 |   - metric: gpt_eval_accuracy
11 |     aggregation: !function utils.cvrr_aggregate_accuracy
12 |     higher_is_better: true
13 |   - metric: gpt_eval_score
14 |     aggregation: !function utils.cvrr_aggregate_score
15 |     higher_is_better: true
16 | include: _default_template_yaml
17 | 
18 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cvrr/cvrr_time_order_understanding.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "time_order_understanding"
 2 | task: "cvrr_time_order_understanding"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.cvrr_doc_to_visual
 6 | doc_to_text: !function utils.cvrr_doc_to_text
 7 | doc_to_target: !function utils.cvrr_doc_to_answer
 8 | process_results: !function utils.cvrr_process_results
 9 | metric_list:
10 |   - metric: gpt_eval_accuracy
11 |     aggregation: !function utils.cvrr_aggregate_accuracy
12 |     higher_is_better: true
13 |   - metric: gpt_eval_score
14 |     aggregation: !function utils.cvrr_aggregate_score
15 |     higher_is_better: true
16 | include: _default_template_yaml
17 | 
18 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cvrr/cvrr_understanding_emotional_context.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "understanding_emotional_context"
 2 | task: "cvrr_understanding_emotional_context"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.cvrr_doc_to_visual
 6 | doc_to_text: !function utils.cvrr_doc_to_text
 7 | doc_to_target: !function utils.cvrr_doc_to_answer
 8 | process_results: !function utils.cvrr_process_results
 9 | metric_list:
10 |   - metric: gpt_eval_accuracy
11 |     aggregation: !function utils.cvrr_aggregate_accuracy
12 |     higher_is_better: true
13 |   - metric: gpt_eval_score
14 |     aggregation: !function utils.cvrr_aggregate_score
15 |     higher_is_better: true
16 | include: _default_template_yaml
17 | 
18 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/cvrr/cvrr_unusual_and_physically_anomalous_activities.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "unusual_and_physically_anomalous_activities"
 2 | task: "cvrr_unusual_and_physically_anomalous_activities"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.cvrr_doc_to_visual
 6 | doc_to_text: !function utils.cvrr_doc_to_text
 7 | doc_to_target: !function utils.cvrr_doc_to_answer
 8 | process_results: !function utils.cvrr_process_results
 9 | metric_list:
10 |   - metric: gpt_eval_accuracy
11 |     aggregation: !function utils.cvrr_aggregate_accuracy
12 |     higher_is_better: true
13 |   - metric: gpt_eval_score
14 |     aggregation: !function utils.cvrr_aggregate_score
15 |     higher_is_better: true
16 | include: _default_template_yaml
17 | 
18 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/docvqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/docvqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/docvqa/_default_template_docvqa_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/DocVQA
 2 | dataset_name: DocVQA
 3 | dataset_kwargs:
 4 |   token: True
 5 | output_type: generate_until
 6 | doc_to_visual: !function utils.docvqa_doc_to_visual
 7 | doc_to_text: !function utils.docvqa_doc_to_text
 8 | doc_to_target: "answers"
 9 | generation_kwargs:
10 |   max_new_tokens: 32
11 |   temperature: 0
12 |   do_sample: False
13 | model_specific_prompt_kwargs:
14 |   default:
15 |     pre_prompt: ""
16 |     post_prompt: "\nAnswer the question using a single word or phrase."
17 |   qwen_vl:
18 |     pre_prompt: ""
19 |     post_prompt: " Answer:"
20 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/docvqa/docvqa.yaml:
--------------------------------------------------------------------------------
1 | group: docvqa
2 | task:
3 | - docvqa_val
4 | - docvqa_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/docvqa/docvqa_test.yaml:
--------------------------------------------------------------------------------
1 | task: "docvqa_test"
2 | test_split: test
3 | process_results: !function utils.docvqa_test_process_results
4 | metric_list:
5 |   - metric: submission
6 |     aggregation: !function utils.docvqa_test_aggregate_results
7 |     higher_is_better: true
8 | include: _default_template_docvqa_yaml
9 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/docvqa/docvqa_val.yaml:
--------------------------------------------------------------------------------
1 | task: "docvqa_val"
2 | test_split: validation
3 | metric_list:
4 |   - metric: anls
5 |     aggregation: mean
6 |     higher_is_better: true
7 | include: _default_template_docvqa_yaml
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/docvqa/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import logging
 4 | 
 5 | from lmms_eval.tasks._task_utils.file_utils import generate_submission_file
 6 | 
 7 | logger = logging.getLogger("lmms-eval")
 8 | 
 9 | 
10 | def docvqa_doc_to_visual(doc):
11 |     return [doc["image"].convert("RGB")]
12 | 
13 | 
14 | def docvqa_doc_to_text(doc, model_specific_prompt_kwargs):
15 |     question = doc["question"]
16 |     pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
17 |     post_prompt = model_specific_prompt_kwargs["post_prompt"]
18 |     return f"{pre_prompt}{question}{post_prompt}"
19 | 
20 | 
21 | def docvqa_test_process_results(doc, results):
22 |     pred = results[0]
23 |     questionId = doc["questionId"]
24 |     return {"anls": {"questionId": int(questionId), "answer": pred}, "submission": {"questionId": int(questionId), "answer": pred}}
25 | 
26 | 
27 | def docvqa_test_aggregate_results(results, args):
28 |     # save results as json
29 |     path = generate_submission_file("docvqa_test_for_submission.json", args)
30 |     with open(path, "w") as f:
31 |         json.dump(results, f)
32 |     logger.info(f"Results saved to {path}")
33 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/egoschema/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/egoschema/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/egoschema/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/egoschema
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: egoschema
6 | model_specific_prompt_kwargs:
7 |   default:
8 |     pre_prompt: ""
9 |     post_prompt: ""


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/egoschema/egoschema.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "GENERATION"
 2 | task: "egoschema"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.egoschema_doc_to_visual
 6 | doc_to_text: !function utils.egoschema_doc_to_text
 7 | doc_to_target: !function utils.egoschema_doc_to_answer
 8 | process_results: !function utils.egoschema_process_results_generation
 9 | metric_list:
10 |   - metric: submission
11 |     aggregation: !function utils.egoschema_aggregate_mc
12 |     higher_is_better: true
13 | include: _default_template_yaml
14 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/egoschema/egoschema_mcppl.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "MC_PPL"
 2 | task: "egoschema_mcppl"
 3 | test_split: test
 4 | output_type: multiple_choice
 5 | doc_to_visual: !function utils.egoschema_doc_to_visual
 6 | doc_to_text: "question"
 7 | doc_to_target: !function utils.egoschema_doc_to_answer
 8 | doc_to_choice: !function utils.egoschema_doc_to_choice
 9 | process_results: !function utils.egoschema_process_results
10 | metric_list:
11 |   - metric: submission
12 |     aggregation: !function utils.egoschema_aggregate_mc_ppl
13 |     higher_is_better: true
14 | include: _default_template_yaml
15 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/egoschema/egoschema_subset.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "Subset"
 2 | task: "egoschema_subset"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.egoschema_doc_to_visual
 6 | doc_to_text: !function utils.egoschema_doc_to_text
 7 | doc_to_target: !function utils.egoschema_doc_to_answer
 8 | process_results: !function utils.egoschema_process_results_generation
 9 | metric_list:
10 |   - metric: submission
11 |     aggregation: !function utils.egoschema_aggregate_mc
12 |     higher_is_better: true
13 |   - metric: score
14 |     aggregation: !function utils.egoschema_aggregate_score
15 |     higher_is_better: true
16 | include: _default_template_yaml
17 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/egoschema/egoschema_subset_mcppl.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "Subset"
 2 | task: "egoschema_subset_mcppl"
 3 | test_split: test
 4 | output_type: multiple_choice
 5 | doc_to_visual: !function utils.egoschema_doc_to_visual
 6 | doc_to_text: "question"
 7 | doc_to_target: !function utils.egoschema_doc_to_answer
 8 | doc_to_choice: !function utils.egoschema_doc_to_choice
 9 | process_results: !function utils.egoschema_process_results
10 | metric_list:
11 |   - metric: submission
12 |     aggregation: !function utils.egoschema_aggregate_mc_ppl
13 |     higher_is_better: true
14 |   - metric: score
15 |     aggregation: !function utils.egoschema_aggregate_score
16 |     higher_is_better: true
17 | include: _default_template_yaml
18 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/ferret/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/ferret/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/flickr30k/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/flickr30k/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/flickr30k/flickr30k.yaml:
--------------------------------------------------------------------------------
1 | group: flickr30k
2 | task:
3 | - flickr30k_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/gqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gqa/gqa.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/GQA
 2 | dataset_name: testdev_balanced_instructions
 3 | dataset_kwargs:
 4 |   token: True
 5 | task: "gqa"
 6 | test_split: testdev
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.gqa_doc_to_visual
 9 | doc_to_text: !function utils.gqa_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   max_new_tokens: 16
13 |   temperature: 0
14 |   top_p: 1.0
15 |   num_beams: 1
16 |   do_sample: false
17 | metric_list:
18 |   - metric: exact_match
19 |     aggregation: mean
20 |     higher_is_better: true
21 |     ignore_case: true
22 |     ignore_punctuation: true
23 | metadata:
24 |   - version: 0.0
25 |   
26 | model_specific_prompt_kwargs:
27 |   default:
28 |     pre_prompt: ""
29 |     post_prompt: "\nAnswer the question using a single word or phrase."
30 |   qwen_vl:
31 |     pre_prompt: ""
32 |     post_prompt: " Answer:"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/gqa/utils.py:
--------------------------------------------------------------------------------
 1 | from datasets import load_dataset
 2 | 
 3 | GQA_RAW_IMAGE_DATASET = None
 4 | GQA_ID2IMAGE = None
 5 | 
 6 | 
 7 | def gqa_doc_to_visual(doc):
 8 |     global GQA_RAW_IMAGE_DATASET
 9 |     global GQA_ID2IMAGE
10 |     if GQA_RAW_IMAGE_DATASET is None:
11 |         GQA_RAW_IMAGE_DATASET = load_dataset("lmms-lab/GQA", "testdev_balanced_images", split="testdev", token=True)
12 |         GQA_ID2IMAGE = {}
13 |         for row in GQA_RAW_IMAGE_DATASET:
14 |             GQA_ID2IMAGE[row["id"]] = row["image"].convert("RGB")
15 |     image = GQA_ID2IMAGE[doc["imageId"]]
16 |     return [image]
17 | 
18 | 
19 | def gqa_doc_to_text(doc, model_specific_prompt_kwargs):
20 |     question = doc["question"]
21 |     pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
22 |     post_prompt = model_specific_prompt_kwargs["post_prompt"]
23 |     return f"{pre_prompt}{question}{post_prompt}"
24 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/hallusion_bench/__pycache__/evaluate_hb.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/hallusion_bench/__pycache__/evaluate_hb.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/hallusion_bench/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/hallusion_bench/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/iconqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/iconqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/iconqa/_default_template_docvqa_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/ICON-QA
 2 | dataset_kwargs:
 3 |   token: True
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.doc_to_visual
 6 | doc_to_text: !function utils.doc_to_text
 7 | doc_to_target: "answers"
 8 | # process_results: !function utils.test_process_results
 9 | generation_kwargs:
10 |   max_new_tokens: 32
11 |   temperature: 0
12 |   do_sample: False
13 | model_specific_prompt_kwargs:
14 |   default:
15 |     pre_prompt: ""
16 |     statement: "Given a set of images and a question, please provide the answer to the question.\n"
17 |     options_statement: "Question: {question}.\nOptions:\n{options}\nPlease answer with the option letter from the given choices directly."
18 |     freeform_statement: "Question: {question}.\nPlease answer the question using a single word or phrase."
19 | metric_list:
20 |   - metric: anls
21 |     aggregation: mean
22 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/iconqa/iconqa.yaml:
--------------------------------------------------------------------------------
1 | group: iconqa
2 | task:
3 | - iconqa_val
4 | - iconqa_test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/iconqa/iconqa_test.yaml:
--------------------------------------------------------------------------------
1 | task: "iconqa_test"
2 | test_split: test
3 | include: _default_template_docvqa_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/iconqa/iconqa_val.yaml:
--------------------------------------------------------------------------------
1 | task: "iconqa_val"
2 | test_split: val
3 | include: _default_template_docvqa_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/infovqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/infovqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/infovqa/_default_template_infovqa_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/DocVQA
 2 | dataset_name: InfographicVQA
 3 | dataset_kwargs:
 4 |   token: True
 5 | doc_to_target: "answers"
 6 | doc_to_visual: !function utils.infovqa_doc_to_visual
 7 | doc_to_text: !function utils.infovqa_doc_to_text
 8 | generation_kwargs:
 9 |   max_new_tokens: 32
10 |   temperature: 0
11 |   do_sample: False
12 | model_specific_prompt_kwargs:
13 |   default:
14 |     pre_prompt: ""
15 |     post_prompt: "\nAnswer the question using a single word or phrase."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/infovqa/infovqa.yaml:
--------------------------------------------------------------------------------
1 | group: infovqa
2 | task:
3 | - infovqa_val
4 | - infovqa_test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/infovqa/infovqa_test.yaml:
--------------------------------------------------------------------------------
 1 | task: "infovqa_test"
 2 | test_split: test
 3 | output_type: generate_until
 4 | process_results: !function utils.infovqa_test_process_results
 5 | metric_list:
 6 |   - metric: submission
 7 |     aggregation: !function utils.infovqa_test_aggregate_results
 8 |     higher_is_better: true
 9 | include: _default_template_infovqa_yaml
10 |   


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/infovqa/infovqa_val.yaml:
--------------------------------------------------------------------------------
1 | task: "infovqa_val"
2 | test_split: validation
3 | output_type: generate_until
4 | metric_list:
5 |   - metric: anls
6 |     aggregation: mean
7 |     higher_is_better: true
8 | include: _default_template_infovqa_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/infovqa/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import logging
 4 | 
 5 | 
 6 | from lmms_eval.tasks._task_utils.file_utils import generate_submission_file
 7 | 
 8 | lmms_logger = logging.getLogger("lmms-eval")
 9 | 
10 | 
11 | def infovqa_doc_to_visual(doc):
12 |     return [doc["image"].convert("RGB")]
13 | 
14 | 
15 | def infovqa_doc_to_text(doc, model_specific_prompt_kwargs):
16 |     question = doc["question"]
17 |     pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
18 |     post_prompt = model_specific_prompt_kwargs["post_prompt"]
19 |     return f"{pre_prompt}{question}{post_prompt}"
20 | 
21 | 
22 | def infovqa_test_process_results(doc, results):
23 |     pred = results[0]
24 |     questionId = doc["questionId"]
25 |     return {"submission": {"questionId": int(questionId), "answer": pred}}
26 | 
27 | 
28 | def infovqa_test_aggregate_results(results, args):
29 |     # save results as json
30 |     file = generate_submission_file("infovqa_test_for_submission.json", args)
31 |     with open(file, "w") as f:
32 |         json.dump(results, f)
33 |     lmms_logger.info(f"Results saved to {file}")
34 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/internal_eval/__pycache__/d170_cn_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/internal_eval/__pycache__/d170_cn_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/internal_eval/__pycache__/d170_en_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/internal_eval/__pycache__/d170_en_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/internal_eval/__pycache__/dc100_en_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/internal_eval/__pycache__/dc100_en_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/internal_eval/__pycache__/dc200_cn_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/internal_eval/__pycache__/dc200_cn_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/internal_eval/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/internal_eval/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/internal_eval/_default_template_internal_eval_yaml:
--------------------------------------------------------------------------------
1 | model_specific_prompt_kwargs:
2 |   default:
3 |     pre_prompt: ""
4 |     post_prompt: ""


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/internal_eval/d170_cn.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/D170_v4.1_CN
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "d170_cn"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_text: !function utils.doc_to_text # Such that {{prompt}} will be replaced by doc["question"]
 8 | doc_to_visual: !function d170_cn_utils.doc_to_visual
 9 | doc_to_target: "{{annotation}}"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   max_new_tokens: 1024
14 |   temperature: 0
15 |   top_p: 1.0
16 |   num_beams: 1
17 |   do_sample: false
18 | process_results: !function d170_cn_utils.process_results # apply gpt eval here
19 | metric_list:
20 |   - metric: gpt_eval_info
21 |     aggregation: !function d170_cn_utils.d170_cn_aggregate_info
22 |     higher_is_better: false
23 |   - metric: gpt_eval_avg_score
24 |     aggregation: !function d170_cn_utils.d170_cn_aggregate_avg_score
25 |     higher_is_better: true
26 |   - metric: gpt_eval_score2_rate
27 |     aggregation: !function d170_cn_utils.d170_cn_aggregate_score2_rate
28 |     higher_is_better: true
29 | metadata:
30 |   version: 0.0
31 |   gpt_eval_model_name: "gpt-4-1106-preview"
32 | include: _default_template_internal_eval_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/internal_eval/d170_en.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/D170_v4.1_EN
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "d170_en"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function d170_en_utils.doc_to_visual
 8 | doc_to_text: !function utils.doc_to_text # Such that {{prompt}} will be replaced by doc["question"]
 9 | doc_to_target: "{{annotation}}"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   max_new_tokens: 1024
14 |   temperature: 0
15 |   top_p: 1.0
16 |   num_beams: 1
17 |   do_sample: false
18 | process_results: !function d170_en_utils.process_results # apply gpt eval here
19 | metric_list:
20 |   - metric: gpt_eval_info
21 |     aggregation: !function d170_en_utils.d170_en_aggregate_info
22 |     higher_is_better: false
23 |   - metric: gpt_eval_avg_score
24 |     aggregation: !function d170_en_utils.d170_en_aggregate_avg_score
25 |     higher_is_better: true
26 |   - metric: gpt_eval_score2_rate
27 |     aggregation: !function d170_en_utils.d170_en_aggregate_score2_rate
28 |     higher_is_better: true
29 | metadata:
30 |   version: 0.0
31 |   gpt_eval_model_name: "gpt-4-1106-preview"
32 | include: _default_template_internal_eval_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/internal_eval/dc100_en.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/DC100_EN
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "dc100_en"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function dc100_en_utils.doc_to_visual
 8 | doc_to_text: !function utils.doc_to_text # Such that {{prompt}} will be replaced by doc["question"]
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   max_new_tokens: 1024
14 |   temperature: 0
15 |   top_p: 1.0
16 |   num_beams: 1
17 |   do_sample: false
18 | process_results: !function dc100_en_utils.process_results # apply gpt eval here
19 | metric_list:
20 |   - metric: gpt_eval_info
21 |     aggregation: !function dc100_en_utils.dc100_en_aggregate_info
22 |     higher_is_better: false
23 |   - metric: gpt_eval_avg_score
24 |     aggregation: !function dc100_en_utils.dc100_en_aggregate_avg_score
25 |     higher_is_better: true
26 | metadata:
27 |   version: 0.0
28 |   gpt_eval_model_name: "gpt-4-vision-preview"
29 | include: _default_template_internal_eval_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/internal_eval/dc200_cn.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/DC200_CN
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "dc200_cn"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function dc200_cn_utils.doc_to_visual
 8 | doc_to_text: !function utils.doc_to_text # Such that {{prompt}} will be replaced by doc["question"]
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   max_new_tokens: 1024
14 |   temperature: 0
15 |   top_p: 1.0
16 |   num_beams: 1
17 |   do_sample: false
18 | process_results: !function dc200_cn_utils.process_results # apply gpt eval here
19 | metric_list:
20 |   - metric: gpt_eval_info
21 |     aggregation: !function dc200_cn_utils.dc200_cn_aggregate_info
22 |     higher_is_better: false
23 |   - metric: gpt_eval_avg_score
24 |     aggregation: !function dc200_cn_utils.dc200_cn_aggregate_avg_score
25 |     higher_is_better: true
26 | metadata:
27 |   version: 0.0
28 |   gpt_eval_model_name: "gpt-4-vision-preview"
29 | include: _default_template_internal_eval_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/internal_eval/internal_eval.yaml:
--------------------------------------------------------------------------------
1 | group: internal_eval
2 | task:
3 | - d170_cn
4 | - d170_en
5 | - dc100_en
6 | - dc200_cn
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/internal_eval/utils.py:
--------------------------------------------------------------------------------
1 | def doc_to_text(doc, model_specific_prompt_kwargs=None):
2 |     if model_specific_prompt_kwargs is None:
3 |         return doc["question"]
4 |     question = doc["question"]
5 |     pre_prompt = model_specific_prompt_kwargs.get("pre_prompt", "")
6 |     post_prompt = model_specific_prompt_kwargs.get("post_prompt", "")
7 |     return f"{pre_prompt}{question}{post_prompt}"
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/llava-bench-coco/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/llava-bench-coco/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/llava-bench-coco/llava-bench-coco.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/llava-bench-coco
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "llava_bench_coco"
 5 | test_split: train
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.llava_doc_to_visual
 8 | doc_to_text: !function utils.llava_doc_to_text
 9 | doc_to_target: "gpt_answer"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   image_aspect_ratio: original
14 |   max_new_tokens: 1024
15 |   temperature: 0
16 |   top_p: 1.0
17 |   num_beams: 1
18 | process_results: !function utils.llava_process_results
19 | metric_list:
20 |   - metric: gpt_eval_llava_all
21 |     aggregation: !function utils.llava_all_aggregation
22 |     higher_is_better: true
23 |   - metric: gpt_eval_llava_conv
24 |     aggregation: !function utils.llava_conv_aggregation
25 |     higher_is_better: true
26 |   - metric: gpt_eval_llava_detail
27 |     aggregation: !function utils.llava_detail_aggregation
28 |     higher_is_better: true
29 |   - metric: gpt_eval_llava_complex
30 |     aggregation: !function utils.llava_complex_aggregation
31 |     higher_is_better: true
32 | metadata:
33 |   version: 0.0
34 |   gpt_eval_model_name: "gpt-4-0314"
35 | model_specific_prompt_kwargs:
36 |   default:
37 |     pre_prompt: ""
38 |     post_prompt: ""


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/llava-in-the-wild/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/llava-in-the-wild/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/llava_wilder/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/llava_wilder/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/llava_wilder/_default_template_wilder_yaml:
--------------------------------------------------------------------------------
 1 | output_type: generate_until
 2 | doc_to_visual: !function utils.llava_doc_to_visual
 3 | doc_to_text: !function utils.llava_doc_to_text
 4 | doc_to_target: "gpt4v_answer"
 5 | generation_kwargs:
 6 |   until:
 7 |     - "ASSISTANT:"
 8 |   image_aspect_ratio: original
 9 |   max_new_tokens: 4096
10 |   temperature: 0
11 |   top_p: 1.0
12 |   num_beams: 1
13 |   do_sample: false
14 | process_results: !function utils.llava_process_results
15 | metric_list:
16 |   - metric: gpt_eval_llava_all
17 |     aggregation: !function utils.llava_all_aggregation
18 |     higher_is_better: true
19 | metadata:
20 |   version: 0.0
21 |   api_type : openai
22 |   gpt_eval_model_name: "gpt-4-vision-preview"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/llava_wilder/llava_wilder_full.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/llava-wilder
 2 | dataset_name: Full
 3 | dataset_kwargs:
 4 |   token: True
 5 | task: "llava_wilder_full"
 6 | test_split: test
 7 | model_specific_prompt_kwargs:
 8 |   default:
 9 |     pre_prompt: ""
10 |     post_prompt: ""
11 |   xcomposer2_4khd:
12 |     pre_prompt: "[UNUSED_TOKEN_146]user\nQuestion: "
13 |     post_prompt: "[UNUSED_TOKEN_145]\n[UNUSED_TOKEN_146]assistant\n"
14 | include: _default_template_wilder_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/llava_wilder/llava_wilder_medium.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/llava-wilder
 2 | dataset_name: Medium
 3 | dataset_kwargs:
 4 |   token: True
 5 | task: "llava_wilder_medium"
 6 | test_split: test
 7 | model_specific_prompt_kwargs:
 8 |   default:
 9 |     pre_prompt: ""
10 |     post_prompt: ""
11 |   xcomposer2_4khd:
12 |     pre_prompt: "[UNUSED_TOKEN_146]user\nQuestion: "
13 |     post_prompt: "[UNUSED_TOKEN_145]\n[UNUSED_TOKEN_146]assistant\n"
14 | include: _default_template_wilder_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/llava_wilder/llava_wilder_small.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/llava-wilder
 2 | dataset_name: Small
 3 | dataset_kwargs:
 4 |   token: True
 5 | task: "llava_wilder_small"
 6 | test_split: train 
 7 | model_specific_prompt_kwargs:
 8 |   default:
 9 |     pre_prompt: ""
10 |     post_prompt: ""
11 |   xcomposer2_4khd:
12 |     pre_prompt: "[UNUSED_TOKEN_146]user\nQuestion: "
13 |     post_prompt: "[UNUSED_TOKEN_145]\n[UNUSED_TOKEN_146]assistant\n"
14 | include: _default_template_wilder_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/longvideobench/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/longvideobench/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/longvideobench/longvideobench_test_v.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: longvideobench/LongVideoBench
 2 | dataset_name: "Default"
 3 | dataset_kwargs:
 4 |   token: True
 5 |   cache_dir: longvideobench
 6 |   video: True
 7 | task: longvideobench_test_v
 8 | test_split: test
 9 | doc_to_visual: !function utils.longvideobench_doc_to_visual_v
10 | doc_to_text: !function utils.longvideobench_doc_to_text
11 | doc_to_target: "correct_choice"
12 | generation_kwargs:
13 |   max_new_tokens: 32
14 |   temperature: 0
15 |   do_sample: False
16 | process_results: !function utils.longvideobench_process_results
17 | metric_list:
18 |   - metric: lvb_acc
19 |     aggregation: !function utils.longvideobench_aggregate_results
20 |     higher_is_better: true
21 | 
22 | model_specific_prompt_kwargs:
23 |   default:
24 |     pre_prompt: ""
25 |     post_prompt: "Answer with the option's letter from the given choices directly.\n"
26 |   


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/longvideobench/longvideobench_val_v.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: longvideobench/LongVideoBench
 2 | dataset_name: "Default"
 3 | dataset_kwargs:
 4 |   token: True
 5 |   cache_dir: longvideobench
 6 |   video: True
 7 | task: longvideobench_val_v
 8 | test_split: validation
 9 | doc_to_visual: !function utils.longvideobench_doc_to_visual_v
10 | doc_to_text: !function utils.longvideobench_doc_to_text
11 | doc_to_target: "correct_choice"
12 | generation_kwargs:
13 |   max_new_tokens: 32
14 |   temperature: 0
15 |   do_sample: False
16 | process_results: !function utils.longvideobench_process_results
17 | metric_list:
18 |   - metric: lvb_acc
19 |     aggregation: !function utils.longvideobench_aggregate_results
20 |     higher_is_better: true
21 | 
22 | model_specific_prompt_kwargs:
23 |   default:
24 |     pre_prompt: ""
25 |     post_prompt: "Answer with the option's letter from the given choices directly.\n"
26 |   


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathverse/__pycache__/mathverse_evals.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/mathverse/__pycache__/mathverse_evals.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathverse/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/mathverse/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathverse/mathverse.yaml:
--------------------------------------------------------------------------------
 1 | group: mathverse
 2 | task:
 3 |   - mathverse_testmini
 4 |   - mathverse_testmini_text_only
 5 |   - mathverse_testmini_text_lite
 6 |   - mathverse_testmini_text_dominant
 7 |   - mathverse_testmini_vision_intensive
 8 |   - mathverse_testmini_vision_dominant
 9 |   - mathverse_testmini_vision_only
10 | metadata:
11 |   version: 0.0
12 |   gpt_eval_model_name: "gpt-3.5-turbo"
13 |   trunk_response: 30
14 |   quick_match: false


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathverse/mathverse_testmini.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: CaraJ/MathVerse-lmmseval
 2 | dataset_name: testmini
 3 | dataset_kwargs:
 4 |   token: False
 5 | task: "mathverse_testmini"
 6 | test_split: testmini
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.mathverse_doc_to_visual
 9 | doc_to_text: !function utils.mathverse_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   until:
13 |     - "ASSISTANT:"
14 |   max_new_tokens: 1024
15 |   temperature: 0
16 |   top_p: 0
17 |   num_beams: 1
18 |   do_sample: false
19 | process_results: !function utils.mathverse_process_results
20 | metric_list:
21 |   - metric: gpt_eval_score
22 |     aggregation: !function utils.mathverse_aggregate_results_eval
23 |     higher_is_better: true
24 |   - metric: submission
25 |     aggregation: !function utils.mathverse_aggregate_results_submission
26 |     higher_is_better: true
27 |   
28 | model_specific_prompt_kwargs:
29 |   default:
30 |     shot_type: "format-prompt" # can also be "custom-prompt"
31 |     query_type: "query_wo" # now only support query_wo
32 | model_specific_generation_kwargs:
33 |   llava:
34 |     image_aspect_ratio: original


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathverse/mathverse_testmini_text_dominant.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: CaraJ/MathVerse-lmmseval
 2 | dataset_name: testmini_version_split
 3 | dataset_kwargs:
 4 |   token: False
 5 | task: "mathverse_testmini_text_dominant"
 6 | test_split: text_dominant
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.mathverse_doc_to_visual
 9 | doc_to_text: !function utils.mathverse_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   until:
13 |     - "ASSISTANT:"
14 |   max_new_tokens: 1024
15 |   temperature: 0
16 |   top_p: 0
17 |   num_beams: 1
18 |   do_sample: false
19 | process_results: !function utils.mathverse_process_results
20 | metric_list:
21 |   - metric: gpt_eval_score
22 |     aggregation: !function utils.mathverse_aggregate_results_eval
23 |     higher_is_better: true
24 |   - metric: submission
25 |     aggregation: !function utils.mathverse_aggregate_results_submission
26 |     higher_is_better: true
27 | 
28 | model_specific_prompt_kwargs:
29 |   default:
30 |     shot_type: "format-prompt" # can also be "custom-prompt"
31 |     query_type: "query_wo" # now only support query_wo
32 | model_specific_generation_kwargs:
33 |   llava:
34 |     image_aspect_ratio: original


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathverse/mathverse_testmini_text_lite.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: CaraJ/MathVerse-lmmseval
 2 | dataset_name: testmini_version_split
 3 | dataset_kwargs:
 4 |   token: False
 5 | task: "mathverse_testmini_text_lite"
 6 | test_split: text_lite
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.mathverse_doc_to_visual
 9 | doc_to_text: !function utils.mathverse_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   until:
13 |     - "ASSISTANT:"
14 |   max_new_tokens: 1024
15 |   temperature: 0
16 |   top_p: 0
17 |   num_beams: 1
18 |   do_sample: false
19 | process_results: !function utils.mathverse_process_results
20 | metric_list:
21 |   - metric: gpt_eval_score
22 |     aggregation: !function utils.mathverse_aggregate_results_eval
23 |     higher_is_better: true
24 |   - metric: submission
25 |     aggregation: !function utils.mathverse_aggregate_results_submission
26 |     higher_is_better: true
27 | 
28 | model_specific_prompt_kwargs:
29 |   default:
30 |     shot_type: "format-prompt" # can also be "custom-prompt"
31 |     query_type: "query_wo" # now only support query_wo
32 | model_specific_generation_kwargs:
33 |   llava:
34 |     image_aspect_ratio: original


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathverse/mathverse_testmini_text_only.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: CaraJ/MathVerse-lmmseval
 2 | dataset_name: testmini_text_only
 3 | dataset_kwargs:
 4 |   token: False
 5 | task: "mathverse_testmini_text_only"
 6 | test_split: text_only
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.mathverse_doc_to_visual
 9 | doc_to_text: !function utils.mathverse_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   until:
13 |     - "ASSISTANT:"
14 |   max_new_tokens: 1024
15 |   temperature: 0
16 |   top_p: 0
17 |   num_beams: 1
18 |   do_sample: false
19 | process_results: !function utils.mathverse_process_results
20 | metric_list:
21 |   - metric: gpt_eval_score
22 |     aggregation: !function utils.mathverse_aggregate_results_eval
23 |     higher_is_better: true
24 |   - metric: submission
25 |     aggregation: !function utils.mathverse_aggregate_results_submission
26 |     higher_is_better: true
27 |   
28 | model_specific_prompt_kwargs:
29 |   default:
30 |     shot_type: "format-prompt" # can also be "custom-prompt"
31 |     query_type: "query_wo" # now only support query_wo
32 | model_specific_generation_kwargs:
33 |   llava:
34 |     image_aspect_ratio: original


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathverse/mathverse_testmini_vision_dominant.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: CaraJ/MathVerse-lmmseval
 2 | dataset_name: testmini_version_split
 3 | dataset_kwargs:
 4 |   token: False
 5 | task: "mathverse_testmini_vision_dominant"
 6 | test_split: vision_dominant
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.mathverse_doc_to_visual
 9 | doc_to_text: !function utils.mathverse_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   until:
13 |     - "ASSISTANT:"
14 |   max_new_tokens: 1024
15 |   temperature: 0
16 |   top_p: 0
17 |   num_beams: 1
18 |   do_sample: false
19 | process_results: !function utils.mathverse_process_results
20 | metric_list:
21 |   - metric: gpt_eval_score
22 |     aggregation: !function utils.mathverse_aggregate_results_eval
23 |     higher_is_better: true
24 |   - metric: submission
25 |     aggregation: !function utils.mathverse_aggregate_results_submission
26 |     higher_is_better: true
27 |   
28 | model_specific_prompt_kwargs:
29 |   default:
30 |     shot_type: "format-prompt" # can also be "custom-prompt"
31 |     query_type: "query_wo" # now only support query_wo
32 | model_specific_generation_kwargs:
33 |   llava:
34 |     image_aspect_ratio: original


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathverse/mathverse_testmini_vision_intensive.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: CaraJ/MathVerse-lmmseval
 2 | dataset_name: testmini_version_split
 3 | dataset_kwargs:
 4 |   token: False
 5 | task: "mathverse_testmini_vision_intensive"
 6 | test_split: vision_intensive
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.mathverse_doc_to_visual
 9 | doc_to_text: !function utils.mathverse_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   until:
13 |     - "ASSISTANT:"
14 |   max_new_tokens: 1024
15 |   temperature: 0
16 |   top_p: 0
17 |   num_beams: 1
18 |   do_sample: false
19 | process_results: !function utils.mathverse_process_results
20 | metric_list:
21 |   - metric: gpt_eval_score
22 |     aggregation: !function utils.mathverse_aggregate_results_eval
23 |     higher_is_better: true
24 |   - metric: submission
25 |     aggregation: !function utils.mathverse_aggregate_results_submission
26 |     higher_is_better: true
27 |   
28 | model_specific_prompt_kwargs:
29 |   default:
30 |     shot_type: "format-prompt" # can also be "custom-prompt"
31 |     query_type: "query_wo" # now only support query_wo
32 | model_specific_generation_kwargs:
33 |   llava:
34 |     image_aspect_ratio: original


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathverse/mathverse_testmini_vision_only.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: CaraJ/MathVerse-lmmseval
 2 | dataset_name: testmini_version_split
 3 | dataset_kwargs:
 4 |   token: False
 5 | task: "mathverse_testmini_vision_only"
 6 | test_split: vision_only
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.mathverse_doc_to_visual
 9 | doc_to_text: !function utils.mathverse_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   until:
13 |     - "ASSISTANT:"
14 |   max_new_tokens: 1024
15 |   temperature: 0
16 |   top_p: 0
17 |   num_beams: 1
18 |   do_sample: false
19 | process_results: !function utils.mathverse_process_results
20 | metric_list:
21 |   - metric: gpt_eval_score
22 |     aggregation: !function utils.mathverse_aggregate_results_eval
23 |     higher_is_better: true
24 |   - metric: submission
25 |     aggregation: !function utils.mathverse_aggregate_results_submission
26 |     higher_is_better: true
27 |   
28 | model_specific_prompt_kwargs:
29 |   default:
30 |     shot_type: "format-prompt" # can also be "custom-prompt"
31 |     query_type: "query_wo" # now only support query_wo
32 | model_specific_generation_kwargs:
33 |   llava:
34 |     image_aspect_ratio: original


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathvista/__pycache__/mathvista_evals.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/mathvista/__pycache__/mathvista_evals.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathvista/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/mathvista/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathvista/mathvista.yaml:
--------------------------------------------------------------------------------
1 | group: mathvista
2 | task:
3 |   - mathvista_testmini
4 |   - mathvista_test
5 | metadata:
6 |   version: 0.0
7 |   gpt_eval_model_name: "gpt-4-0613"
8 |   quick_extract: false


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathvista/mathvista_test.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: AI4Math/MathVista
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "mathvista_test"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.mathvista_doc_to_visual
 8 | doc_to_text: !function utils.mathvista_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   max_new_tokens: 1024
14 |   temperature: 0
15 |   top_p: 1.0
16 |   num_beams: 1
17 |   do_sample: false
18 | process_results: !function utils.mathvista_process_results
19 | metric_list:
20 |   - metric: submission
21 |     aggregation: !function utils.mathvista_aggregate_results
22 |     higher_is_better: true
23 | 
24 | model_specific_prompt_kwargs:
25 |   default:
26 |     shot_type: "format-prompt" # can be "reason-first", "solution", "step-by-step"
27 | model_specific_generation_kwargs:
28 |   llava:
29 |     image_aspect_ratio: original


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mathvista/mathvista_testmini.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: AI4Math/MathVista
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "mathvista_testmini"
 5 | test_split: testmini
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.mathvista_doc_to_visual
 8 | doc_to_text: !function utils.mathvista_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   max_new_tokens: 1024
14 |   temperature: 0
15 |   top_p: 1.0
16 |   num_beams: 1
17 |   do_sample: false
18 | process_results: !function utils.mathvista_process_results
19 | metric_list:
20 |   - metric: gpt_eval_score
21 |     aggregation: !function utils.mathvista_aggregate_results
22 |     higher_is_better: true
23 | 
24 | model_specific_prompt_kwargs:
25 |   default:
26 |     shot_type: "format-prompt" # can be "reason-first", "solution", "step-by-step"
27 |     shot: 0
28 |     use_caption: False
29 |     use_ocr: False
30 |   phi3v:
31 |     shot_type: "solution"
32 | model_specific_generation_kwargs:
33 |   llava:
34 |     image_aspect_ratio: original


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mlvu/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/mlvu/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mlvu/mlvu.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: sy1998/temp
 2 | dataset_name: "Default"
 3 | dataset_kwargs:
 4 |   token: True
 5 |   cache_dir: mlvu
 6 |   video: True
 7 | task: mlvu
 8 | test_split: test
 9 | output_type: generate_until
10 | doc_to_visual: !function utils.mlvu_doc_to_visual
11 | doc_to_text: !function utils.mlvu_doc_to_text
12 | doc_to_target: "answer"
13 | # The return value of process_results will be used by metrics
14 | process_results: !function utils.mlvu_process_results
15 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
16 | metric_list:
17 |   - metric: mlvu_percetion_score
18 |     aggregation: !function utils.mlvu_aggregate_results
19 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/__pycache__/cc_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/mmbench/__pycache__/cc_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/__pycache__/cn_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/mmbench/__pycache__/cn_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/__pycache__/en_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/mmbench/__pycache__/en_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/__pycache__/mmbench_evals.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/mmbench/__pycache__/mmbench_evals.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/_default_template_mmbench_cn_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MMBench
 2 | dataset_kwargs:
 3 |   token: True
 4 | doc_to_target: "answer"
 5 | dataset_name: "cn"
 6 | output_type: generate_until
 7 | doc_to_visual: !function cn_utils.mmbench_doc_to_visual
 8 | doc_to_text: !function cn_utils.mmbench_doc_to_text
 9 | generation_kwargs:
10 |   max_new_tokens: 256
11 |   temperature: 0
12 |   top_p: 1.0
13 |   num_beams: 1
14 |   do_sample: false
15 | process_results: !function cn_utils.mmbench_process_results
16 | model_specific_prompt_kwargs:
17 |   default:
18 |     pre_prompt: ""
19 |     post_prompt: "\n请直接使用所提供的选项字母作为答案回答。"
20 | model_specific_generation_kwargs:
21 |   llava:
22 |     image_aspect_ratio: original
23 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/_default_template_mmbench_en_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MMBench
 2 | dataset_kwargs:
 3 |   token: True
 4 | doc_to_target: "answer"
 5 | model_specific_prompt_kwargs:
 6 |   default:
 7 |     pre_prompt: ""
 8 |     post_prompt: "\nAnswer with the option's letter from the given choices directly."
 9 | doc_to_visual: !function en_utils.mmbench_doc_to_visual
10 | doc_to_text: !function en_utils.mmbench_doc_to_text
11 | doc_to_target: "answer"
12 | process_results: !function en_utils.mmbench_process_results
13 | model_specific_generation_kwargs:
14 |   llava:
15 |     image_aspect_ratio: original
16 | output_type: generate_until
17 | dataset_name: "en"
18 | generation_kwargs:
19 |   until:
20 |     - "ASSISTANT:"
21 |   max_new_tokens: 1024
22 |   temperature: 0
23 |   top_p: 1.0
24 |   num_beams: 1
25 |   do_sample: false
26 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/mmbench.yaml:
--------------------------------------------------------------------------------
 1 | group: mmbench
 2 | task:
 3 |   - mmbench_en_dev
 4 |   - mmbench_en_test
 5 |   - mmbench_cn_dev
 6 |   - mmbench_cn_test
 7 |   - mmbench_cn_cc
 8 | metadata:
 9 |   version: 0.0
10 |   sys_prompt: "There are several options:"
11 |   gpt_eval_model_name: "gpt-3.5-turbo-0613"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/mmbench_cc.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MMBench
 2 | dataset_name: cc
 3 | dataset_kwargs:
 4 |   token: True
 5 | task: "mmbench_cn_cc"
 6 | test_split: test
 7 | output_type: generate_until
 8 | doc_to_visual: !function cc_utils.mmbench_doc_to_visual
 9 | doc_to_text: !function cc_utils.mmbench_cn_cc_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   max_new_tokens: 256
13 |   temperature: 0
14 |   top_p: 1.0
15 |   num_beams: 1
16 |   do_sample: false
17 | process_results: !function cc_utils.mmbench_cn_cc_process_results
18 | metric_list:
19 |   - metric: gpt_eval_score
20 |     aggregation: !function cc_utils.mmbench_cn_cc_aggregate_dev_results_eval
21 |     higher_is_better: true
22 |   - metric: submission
23 |     aggregation: !function cc_utils.mmbench_cn_cc_aggregate_results
24 | metadata:
25 |   version: 0.0
26 |   gpt_eval_model_name: "gpt-3.5-turbo-0613"
27 | 
28 | model_specific_prompt_kwargs:
29 |   default:
30 |     pre_prompt: ""
31 |     post_prompt: "\n请直接使用所提供的选项字母作为答案回答。"
32 | model_specific_generation_kwargs:
33 |   llava:
34 |     image_aspect_ratio: original


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/mmbench_cn.yaml:
--------------------------------------------------------------------------------
1 | group: mmbench_cn
2 | task:
3 |   - mmbench_cn_dev
4 |   - mmbench_cn_test
5 |   - mmbench_cn_cc
6 | metadata:
7 |   version: 0.0
8 |   gpt_eval_model_name: "gpt-3.5-turbo-0613"
9 |   sys_prompt: "有如下几个选项："


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/mmbench_cn_dev.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmbench_cn_dev"
 2 | test_split: "dev"
 3 | metric_list:
 4 |   - metric: gpt_eval_score
 5 |     aggregation: !function cn_utils.mmbench_aggregate_dev_results_eval
 6 |     higher_is_better: true
 7 |   - metric: submission
 8 |     higher_is_better: true
 9 |     aggregation: !function cn_utils.mmbench_aggregate_dev_results
10 | include: _default_template_mmbench_cn_yaml
11 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/mmbench_cn_test.yaml:
--------------------------------------------------------------------------------
1 | task: mmbench_cn_test
2 | test_split: test
3 | metric_list:
4 |   - metric: submission
5 |     aggregation: !function cn_utils.mmbench_aggregate_test_results
6 |     higher_is_better: true
7 | include: _default_template_mmbench_cn_yaml
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/mmbench_en.yaml:
--------------------------------------------------------------------------------
1 | group: mmbench_en
2 | task:
3 |   - mmbench_en_dev
4 |   - mmbench_en_test
5 | metadata:
6 |   version: 0.0
7 |   sys_prompt: "There are several options:"
8 |   gpt_eval_model_name: "gpt-3.5-turbo-0613"
9 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/mmbench_en_dev.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmbench_en_dev"
 2 | test_split: dev
 3 | include: _default_template_mmbench_en_yaml
 4 | metric_list:
 5 |   - metric: gpt_eval_score
 6 |     aggregation: !function en_utils.mmbench_aggregate_dev_results_eval
 7 |     higher_is_better: true
 8 |   - metric: submission
 9 |     aggregation: !function en_utils.mmbench_aggregate_dev_results_submission
10 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmbench/mmbench_en_test.yaml:
--------------------------------------------------------------------------------
1 | task: "mmbench_en_test"
2 | test_split: test
3 | include: _default_template_mmbench_en_yaml
4 | metric_list:
5 |   - metric: submission
6 |     aggregation: !function en_utils.mmbench_aggregate_test_results
7 |     higher_is_better: true
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mme/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/mme/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmmu/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/mmmu/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmmu/__pycache__/utils_group_img.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/mmmu/__pycache__/utils_group_img.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmmu/arial.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/mmmu/arial.ttf


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmmu/mmmu.yaml:
--------------------------------------------------------------------------------
1 | group: mmmu
2 | task:
3 | - mmmu_val
4 | - mmmu_test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmmu/mmmu_group_img.yaml:
--------------------------------------------------------------------------------
1 | group: mmmu_group_img
2 | task:
3 | - mmmu_val_group_img
4 | - mmmu_test_group_img
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmmu/mmmu_group_img_test.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MMMU
 2 | task: "mmmu_test_group_img"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils_group_img.mmmu_doc_to_visual
 6 | doc_to_text: !function utils_group_img.mmmu_doc_to_text
 7 | doc_to_target: "answer"
 8 | # The return value of process_results will be used by metrics
 9 | process_results: !function utils_group_img.mmmu_process_results
10 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
11 | generation_kwargs:
12 |   max_new_tokens: 16
13 |   image_aspect_ratio: original
14 | metric_list:
15 |   - metric: submission
16 |     aggregation: !function utils_group_img.mmmu_test_aggregate_results_for_submission
17 |     higher_is_better: true
18 | metadata:
19 |   - version: 0.0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmmu/mmmu_group_img_val.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MMMU
 2 | task: "mmmu_val_group_img"
 3 | test_split: validation
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils_group_img.mmmu_doc_to_visual
 6 | doc_to_text: !function utils_group_img.mmmu_doc_to_text
 7 | doc_to_target: "answer"
 8 | # The return value of process_results will be used by metrics
 9 | process_results: !function utils_group_img.mmmu_process_results
10 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
11 | generation_kwargs:
12 |   max_new_tokens: 128
13 | model_specific_generation_kwargs:
14 |   llava:
15 |     image_aspect_ratio: original
16 | metric_list:
17 |   - metric: mmmu_acc
18 |     aggregation: !function utils_group_img.mmmu_aggregate_results
19 |     higher_is_better: true
20 | metadata:
21 |   - version: 0.0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmmu/mmmu_test.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MMMU
 2 | task: "mmmu_test"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.mmmu_doc_to_visual
 6 | doc_to_text: !function utils.mmmu_doc_to_text
 7 | doc_to_target: "answer"
 8 | # The return value of process_results will be used by metrics
 9 | process_results: !function utils.mmmu_process_results
10 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
11 | generation_kwargs:
12 |   max_new_tokens: 16
13 |   image_aspect_ratio: original
14 | metric_list:
15 |   - metric: submission
16 |     aggregation: !function utils.mmmu_test_aggregate_results_for_submission
17 |     higher_is_better: true
18 | metadata:
19 |   - version: 0.0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmmu/mmmu_val.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MMMU
 2 | task: "mmmu_val"
 3 | test_split: validation
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.mmmu_doc_to_visual
 6 | doc_to_text: !function utils.mmmu_doc_to_text
 7 | doc_to_target: "answer"
 8 | # The return value of process_results will be used by metrics
 9 | process_results: !function utils.mmmu_process_results
10 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
11 | generation_kwargs:
12 |   max_new_tokens: 128
13 | model_specific_generation_kwargs:
14 |   llava:
15 |     image_aspect_ratio: original
16 | metric_list:
17 |   - metric: mmmu_acc
18 |     aggregation: !function utils.mmmu_aggregate_results
19 |     higher_is_better: true
20 | metadata:
21 |   - version: 0.0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/__pycache__/mmupd_evals.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/mmupd/__pycache__/mmupd_evals.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/mmupd/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/_default_template_mmupd_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: MM-UPD/MM-UPD
 2 | doc_to_target: "answer"
 3 | doc_to_visual: !function utils.mmupd_doc_to_visual
 4 | doc_to_text: !function utils.mmupd_doc_to_text
 5 | doc_to_target: "answer"
 6 | process_results: !function utils.mmupd_process_results
 7 | model_specific_generation_kwargs:
 8 |   llava:
 9 |     image_aspect_ratio: original
10 | output_type: generate_until
11 | generation_kwargs:
12 |   until:
13 |     - "ASSISTANT:"
14 |   max_new_tokens: 1024
15 |   temperature: 0
16 |   top_p: 0
17 |   num_beams: 1
18 |   do_sample: false
19 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmaad_base.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmaad_base"
 2 | test_split: test
 3 | dataset_name: mmaad_base
 4 | model_specific_prompt_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\n"
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmaad_base
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmaad_instruction.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmaad_instruction"
 2 | test_split: test
 3 | dataset_name: mmaad_base
 4 | model_specific_prompt_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\nIf all the options are incorrect, answer \"F. None of the above\"."
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmaad_instruction
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmaad_option.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmaad_option"
 2 | test_split: test
 3 | dataset_name: mmaad_option
 4 | model_specific_prompt_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\nAnswer with the option's letter from the given choices directly."
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmaad_option
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmiasd_base.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmiasd_base"
 2 | test_split: test
 3 | dataset_name: mmiasd_base
 4 | model_specific_prompt_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\n"
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmiasd_base
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmiasd_instruction.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmiasd_instruction"
 2 | test_split: test
 3 | dataset_name: mmiasd_base
 4 | model_specific_prompt_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\nIf all the options are incorrect, answer \"F. None of the above\"."
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmiasd_instruction
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmiasd_option.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmiasd_option"
 2 | test_split: test
 3 | dataset_name: mmiasd_option
 4 | model_specific_prompt_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\nAnswer with the option's letter from the given choices directly."
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmiasd_option
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmivqd_base.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmivqd_base"
 2 | test_split: test
 3 | dataset_name: mmivqd_base
 4 | model_specific_prompt_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\n"
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmivqd_base
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmivqd_instruction.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmivqd_instruction"
 2 | test_split: test
 3 | dataset_name: mmivqd_base
 4 | model_specific_prompt_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\nIf the given image is irrelevant to the question, answer \"F. The image and question are irrelevant.\"."
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmivqd_instruction
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmivqd_option.yaml:
--------------------------------------------------------------------------------
 1 | task: "mmivqd_option"
 2 | test_split: test
 3 | dataset_name: mmivqd_option
 4 | model_specific_prompt_kwargs:
 5 |   default:
 6 |     pre_prompt: ""
 7 |     post_prompt: "\nAnswer with the option's letter from the given choices directly."
 8 | include: _default_template_mmupd_yaml
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.mmivqd_option
12 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmupd.yaml:
--------------------------------------------------------------------------------
 1 | group: mmupd
 2 | task:
 3 |   - mmaad_base
 4 |   - mmaad_option
 5 |   - mmaad_instruction
 6 |   - mmiasd_base
 7 |   - mmiasd_option
 8 |   - mmiasd_instruction
 9 |   - mmivqd_base
10 |   - mmivqd_option
11 |   - mmivqd_instruction
12 | metadata:
13 |   version: 0.0
14 |   sys_prompt: ""
15 |   gpt_eval_model_name: "gpt-3.5-turbo-0125"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmupd_base.yaml:
--------------------------------------------------------------------------------
 1 | group: mmupd_base
 2 | task:
 3 |   - mmaad_base
 4 |   - mmiasd_base
 5 |   - mmivqd_base
 6 | metadata:
 7 |   version: 0.0
 8 |   sys_prompt: ""
 9 |   gpt_eval_model_name: "gpt-3.5-turbo-0125"
10 |   


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmupd_instruction.yaml:
--------------------------------------------------------------------------------
1 | group: mmupd_instruction
2 | task:
3 |   - mmaad_instruction
4 |   - mmiasd_instruction
5 |   - mmivqd_instruction
6 | metadata:
7 |   version: 0.0
8 |   sys_prompt: ""
9 |   gpt_eval_model_name: "gpt-3.5-turbo-0125"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmupd/mmupd_option.yaml:
--------------------------------------------------------------------------------
1 | group: mmupd_option
2 | task:
3 |   - mmaad_option
4 |   - mmiasd_option
5 |   - mmivqd_option
6 | metadata:
7 |   version: 0.0
8 |   sys_prompt: ""
9 |   gpt_eval_model_name: "gpt-3.5-turbo-0125"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmvet/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/mmvet/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mmvet/mmvet.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MMVet
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "mmvet"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.mmvet_doc_to_visual
 8 | doc_to_text: !function utils.doc_to_text # Such that {{question}} will be replaced by doc["question"]
 9 | doc_to_target: "{{answer}}"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   max_new_tokens: 32768
14 |   temperature: 0
15 |   top_p: 1.0
16 |   num_beams: 1
17 |   do_sample: false
18 | process_results: !function utils.mmvet_process_results # apply gpt eval here
19 | metric_list:
20 |   - metric: gpt_eval_score
21 |     aggregation: !function utils.mmvet_aggregate_results
22 |     higher_is_better: true
23 | metadata:
24 |   version: 0.0
25 |   gpt_eval_model_name: "gpt-4-0613"
26 | model_specific_prompt_kwargs:
27 |   default:
28 |     pre_prompt: "Please think step by step and try to provide best answer to the following question: \n\n"
29 |     post_prompt: ""
30 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multidocvqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/multidocvqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multidocvqa/multidocvqa.yaml:
--------------------------------------------------------------------------------
1 | group: multidocvqa
2 | task:
3 | - multidocvqa_val
4 | - multidocvqa_test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multidocvqa/multidocvqa_test.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MP-DocVQA
 2 | task: "multidocvqa_test"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.multidocvqa_doc_to_visual
 6 | doc_to_text: !function utils.multidocvqa_doc_to_text
 7 | doc_to_target: "answers"
 8 | generation_kwargs:
 9 |   max_new_tokens: 32
10 |   temperature: 0
11 |   do_sample: False
12 | process_results: !function utils.multidocvqa_process_test_results_for_submission
13 | metric_list:
14 |   - metric: submission
15 |     aggregation: !function utils.multidocvqa_test_aggregate_results_for_submission
16 | model_specific_prompt_kwargs:
17 |   default:
18 |     pre_prompt: ""
19 |     post_prompt: "\nAnswer the question using a single word or phrase."
20 |   


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multidocvqa/multidocvqa_val.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/MP-DocVQA
 2 | task: "multidocvqa_val"
 3 | test_split: val
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.multidocvqa_doc_to_visual
 6 | doc_to_text: !function utils.multidocvqa_doc_to_text
 7 | doc_to_target: "answers"
 8 | generation_kwargs:
 9 |   max_new_tokens: 32
10 |   temperature: 0
11 |   do_sample: False
12 | process_results: !function utils.multidocvqa_process_results
13 | metric_list:
14 |   - metric: anls
15 |     aggregation: !function utils.multidocvqa_aggregate_results_anls
16 |     higher_is_better: true
17 |   - metric: accuracy
18 |     aggregation: !function utils.multidocvqa_aggregate_results_accuracy
19 |     higher_is_better: true
20 | model_specific_prompt_kwargs:
21 |   default:
22 |     pre_prompt: ""
23 |     post_prompt: "\nAnswer the question using a single word or phrase."
24 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/_default_template.yaml:
--------------------------------------------------------------------------------
 1 | test_split: train
 2 | output_type: generate_until
 3 | doc_to_visual: !function utils.llava_doc_to_visual
 4 | doc_to_text: !function utils.llava_doc_to_text
 5 | doc_to_target: "gpt_answer"
 6 | generation_kwargs:
 7 |   until:
 8 |     - "ASSISTANT:"
 9 |   image_aspect_ratio: original
10 |   max_new_tokens: 1024
11 |   temperature: 0
12 |   top_p: 0
13 |   num_beams: 1
14 |   do_sample: false
15 | process_results: !function utils.llava_process_results
16 | metric_list:
17 |   - metric: gpt_eval_llava_all
18 |     aggregation: !function utils.llava_all_aggregation
19 |     higher_is_better: true
20 |   - metric: gpt_eval_llava_conv
21 |     aggregation: !function utils.llava_conv_aggregation
22 |     higher_is_better: true
23 |   - metric: gpt_eval_llava_detail
24 |     aggregation: !function utils.llava_detail_aggregation
25 |     higher_is_better: true
26 |   - metric: gpt_eval_llava_complex
27 |     aggregation: !function utils.llava_complex_aggregation
28 |     higher_is_better: true
29 | metadata:
30 |   version: 0.0
31 |   gpt_eval_model_name: "gpt-4-0613"
32 | model_specific_prompt_kwargs:
33 |   default:
34 |     pre_prompt: ""
35 |     post_prompt: ""


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/arabic_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: arabic
4 |     token: True
5 | task: "llava_in_the_wild_arabic"
6 | include: _default_template.yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/bengali_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: bengali
4 |     token: True
5 | task: "llava_in_the_wild_bengali"
6 | include: _default_template.yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/chinese_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: chinese
4 |     token: True
5 | task: "llava_in_the_wild_chinese"
6 | include: _default_template.yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/french_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: french
4 |     token: True
5 | task: "llava_in_the_wild_french"
6 | include: _default_template.yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/hindi_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: hindi
4 |     token: True
5 | task: "llava_in_the_wild_hindi"
6 | include: _default_template.yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/japanese_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: japanese
4 |     token: True
5 | task: "llava_in_the_wild_japanese"
6 | include: _default_template.yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/russian_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: russian
4 |     token: True
5 | task: "llava_in_the_wild_russian"
6 | include: _default_template.yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/spanish_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |   config: spanish
4 |   token: True
5 | task: "llava_in_the_wild_spanish"
6 | include: _default_template.yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/multilingual-llava-bench-in-the-wild/urdu_llava_in_the_wild.yaml:
--------------------------------------------------------------------------------
1 | dataset_path: "gagan3012/multilingual-llava-bench"
2 | dataset_kwargs:
3 |     config: urdu
4 |     token: True
5 | task: "llava_in_the_wild_urdu"
6 | include: _default_template.yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/mvbench/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/_default_template.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: penGVLab/MVBench
 2 | dataset_kwargs:
 3 |   token: True
 4 |   cache_dir: mvbench
 5 |   video: True
 6 | generation_kwargs:
 7 |   max_new_tokens: 16
 8 |   temperature: 0
 9 |   top_p: 1.0
10 |   num_beams: 1
11 |   do_sample: false
12 | 
13 | output_type: generate_until
14 | doc_to_visual: !function utils.mvbench_doc_to_visual
15 | doc_to_text: !function utils.mvbench_doc_to_text
16 | doc_to_target: "answer"
17 | # The return value of process_results will be used by metrics
18 | process_results: !function utils.mvbench_process_results
19 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
20 | metric_list:
21 |   - metric: mvbench_accuracy
22 |     aggregation: !function utils.mvbench_aggregate_results
23 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench.yaml:
--------------------------------------------------------------------------------
 1 | group: mvbench
 2 | task:
 3 |   - mvbench_action_sequence
 4 |   - mvbench_moving_count
 5 |   - mvbench_action_prediction
 6 |   - mvbench_episodic_reasoning
 7 |   - mvbench_action_antonym
 8 |   - mvbench_action_count
 9 |   - mvbench_scene_transition
10 |   - mvbench_object_shuffle
11 |   - mvbench_object_existence
12 |   - mvbench_fine_grained_pose
13 |   - mvbench_unexpected_action
14 |   - mvbench_moving_direction
15 |   - mvbench_state_change
16 |   - mvbench_object_interaction
17 |   - mvbench_character_order
18 |   - mvbench_action_localization
19 |   - mvbench_counterfactual_inference
20 |   - mvbench_fine_grained_action
21 |   - mvbench_moving_attribute
22 |   - mvbench_egocentric_navigation
23 | 
24 | # ['action_sequence', 'moving_count', 'action_prediction', 'episodic_reasoning', 'action_antonym', 'action_count', 'scene_transition', 'object_shuffle', 'object_existence', 'fine_grained_pose', 'unexpected_action', 'moving_direction', 'state_change', 'object_interaction', 'character_order', 'action_localization', 'counterfactual_inference', 'fine_grained_action', 'moving_attribute', 'egocentric_navigation']


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_action_antonym.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_action_antonym
3 | dataset_name: action_antonym
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_antonym
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_action_count.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_action_count
3 | dataset_name: action_count
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_count
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_action_localization.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_action_localization
3 | dataset_name: action_localization
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_localization
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_action_prediction.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_action_prediction
3 | dataset_name: action_prediction
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_prediction
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_action_sequence.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_action_sequence
3 | dataset_name: action_sequence
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: action_sequence
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_character_order.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_character_order
3 | dataset_name: character_order
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: character_order
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_counterfactual_inference.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_counterfactual_inference
3 | dataset_name: counterfactual_inference
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: counterfactual_inference
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_egocentric_navigation.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_egocentric_navigation
3 | dataset_name: egocentric_navigation
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: egocentric_navigation
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_episodic_reasoning.yaml:
--------------------------------------------------------------------------------
 1 | include: _default_template.yaml
 2 | task: mvbench_episodic_reasoning
 3 | dataset_name: episodic_reasoning
 4 | test_split: train
 5 | doc_to_visual: !function utils.mvbench_frames_doc_to_visual
 6 | generation_kwargs:
 7 |   max_new_tokens: 16
 8 |   temperature: 0
 9 |   top_p: 1.0
10 |   num_beams: 1
11 |   do_sample: false
12 |   
13 | lmms_eval_specific_kwargs:
14 |   default:
15 |     sub_task: episodic_reasoning
16 |     post_prompt: "Answer with the option's letter from the given choices directly."
17 | 
18 | metadata:
19 |   task_type: video
20 |   sample_frames: 32 # for episodic reasoning, it stores the frame images in folder so we need to set the number of frames to sample here.


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_fine_grained_action.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_fine_grained_action
3 | dataset_name: fine_grained_action
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: fine_grained_action
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_fine_grained_pose.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_fine_grained_pose
3 | dataset_name: fine_grained_pose
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: fine_grained_pose
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_moving_attribute.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_moving_attribute
3 | dataset_name: moving_attribute
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: moving_attribute
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_moving_count.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_moving_count
3 | dataset_name: moving_count
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: moving_count
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_moving_direction.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_moving_direction
3 | dataset_name: moving_direction
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: moving_direction
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_object_existence.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_object_existence
3 | dataset_name: object_existence
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: object_existence
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_object_interaction.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_object_interaction
3 | dataset_name: object_interaction
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: object_interaction
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_object_shuffle.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_object_shuffle
3 | dataset_name: object_shuffle
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: object_shuffle
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_scene_transition.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_scene_transition
3 | dataset_name: scene_transition
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: scene_transition
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_state_change.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_state_change
3 | dataset_name: state_change
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: state_change
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/mvbench/mvbench_unexpected_action.yaml:
--------------------------------------------------------------------------------
1 | include: _default_template.yaml
2 | task: mvbench_unexpected_action
3 | dataset_name: unexpected_action
4 | test_split: train
5 | lmms_eval_specific_kwargs:
6 |   default:
7 |     sub_task: unexpected_action
8 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/nextqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/nextqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/nextqa/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/NExTQA
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: nextqa
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/nextqa/nextqa.yaml:
--------------------------------------------------------------------------------
1 | group: nextqa
2 | task:
3 | - nextqa_oe_test
4 | - nextqa_oe_val
5 | - nextqa_mc_test
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/nextqa/nextqa_mc_test.yaml:
--------------------------------------------------------------------------------
 1 | task: "nextqa_mc_test"
 2 | dataset_name: MC
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.nextqa_doc_to_visual
 6 | doc_to_text: !function utils.nextqa_doc_to_text_mc
 7 | doc_to_target: !function utils.nextqa_doc_to_target
 8 | process_results: !function utils.nextqa_mc_process_results
 9 | metric_list:
10 |   - metric: exact_match
11 |     aggregation: mean
12 |     higher_is_better: true
13 | include: _default_template_yaml
14 | model_specific_prompt_kwargs:
15 |   default:
16 |     pre_prompt: ""
17 |     post_prompt: "\nAnswer the question with A, B, C, D, or E."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/nextqa/nextqa_oe_test.yaml:
--------------------------------------------------------------------------------
 1 | task: "nextqa_oe_test"
 2 | dataset_name: OE
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.nextqa_doc_to_visual
 6 | doc_to_text: !function utils.nextqa_doc_to_text
 7 | doc_to_target: !function utils.nextqa_doc_to_target
 8 | process_results: !function utils.nextqa_process_results
 9 | metric_list:
10 |   - metric: WUPS
11 |     aggregation: !function utils.nextqa_aggregate_results
12 |     higher_is_better: true
13 | include: _default_template_yaml
14 | model_specific_prompt_kwargs:
15 |   default:
16 |     pre_prompt: ""
17 |     post_prompt: "\nAnswer a question using a short phrase or sentence."
18 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/nextqa/nextqa_oe_val.yaml:
--------------------------------------------------------------------------------
 1 | task: "nextqa_oe_val"
 2 | dataset_name: OE
 3 | test_split: validation
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.nextqa_doc_to_visual
 6 | doc_to_text: !function utils.nextqa_doc_to_text
 7 | doc_to_target: !function utils.nextqa_doc_to_target
 8 | process_results: !function utils.nextqa_process_results
 9 | metric_list:
10 |   - metric: WUPS
11 |     aggregation: !function utils.nextqa_aggregate_results
12 |     higher_is_better: true
13 | include: _default_template_yaml
14 | model_specific_prompt_kwargs:
15 |   default:
16 |     pre_prompt: ""
17 |     post_prompt: "\nAnswer a question using a short phrase or sentence."
18 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/nocaps/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/nocaps/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/nocaps/_default_template_nocaps_yaml:
--------------------------------------------------------------------------------
1 | model_specific_prompt_kwargs:
2 |   default:
3 |     prompt: "Provide a one-sentence caption for the provided image."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/nocaps/nocaps.yaml:
--------------------------------------------------------------------------------
1 | group : nocaps
2 | task:
3 |   - nocaps_test
4 |   - nocaps_val


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/nocaps/nocaps_test.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/NoCaps
 2 | dataset_kwargs:
 3 |   token: True
 4 | task : "nocaps_test"
 5 | group : "nocaps_caption"
 6 | test_split: test
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.nocaps_doc_to_visual
 9 | doc_to_text: !function utils.nocaps_doc_to_text
10 | doc_to_target: "annotations_captions"
11 | generation_kwargs:
12 |   max_new_tokens: 64
13 |   temperature: 0
14 |   top_p: 1.0
15 |   num_beams: 1
16 |   do_sample: false
17 | process_results: !function utils.nocaps_test_process_result
18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
19 | metric_list:
20 |   - metric: nocaps_passthrough 
21 |     aggregation : !function utils.nocaps_test_aggregation_result
22 |     higher_is_better : true
23 | metadata:
24 |   - version: 0.0
25 | include: _default_template_nocaps_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/ocrbench/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/ocrbench/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/ocrbench/ocrbench.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: echo840/OCRBench
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "ocrbench"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.ocrbench_doc_to_visual
 8 | doc_to_text: !function utils.ocrbench_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   max_new_tokens: 128
12 |   temperature: 0
13 |   top_p: 1.0
14 |   num_beams: 1
15 |   do_sample: false
16 | process_results: !function utils.ocrbench_process_results
17 | metric_list:
18 |   - metric: ocrbench_accuracy
19 |     aggregation: !function utils.ocrbench_aggregate_accuracy
20 |     higher_is_better: true
21 | metadata:
22 |   - version: 0.0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/ok_vqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/ok_vqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/ok_vqa/_default_template_vqa_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/OK-VQA
 2 | output_type: generate_until
 3 | doc_to_visual: !function utils.ok_vqa_doc_to_visual
 4 | doc_to_text: !function utils.ok_vqa_doc_to_text
 5 | doc_to_target: "answer"
 6 | generation_kwargs:
 7 |   until:
 8 |     - "ASSISTANT:"
 9 | metric_list:
10 |   - metric: exact_match
11 |     aggregation: mean
12 |     higher_is_better: true
13 |     ignore_case: true
14 |     ignore_punctuation: true
15 |   - metric: submission
16 |     aggregation: !function utils.ok_vqa_aggregate_submissions
17 |     higher_is_better: true
18 | process_results: !function utils.ok_vqa_process_results
19 | model_specific_prompt_kwargs:
20 |   default:
21 |     pre_prompt: ""
22 |     post_prompt: "\nWhen the provided information is insufficient, respond with 'Unanswerable'.\nAnswer the question using a single word or phrase."
23 | metadata:
24 |   - version: 0.0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/ok_vqa/_generate_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | 
 4 | splits = ["val2014"]
 5 | tasks = ["vqa"]
 6 | 
 7 | if __name__ == "__main__":
 8 |     dump_tasks = []
 9 |     for task in tasks:
10 |         for split in splits:
11 |             yaml_dict = {"group": f"ok_vqa", "task": f"ok_vqa_{split}", "include": f"_default_template_{task}_yaml", "test_split": split}
12 |             if split == "train":
13 |                 yaml_dict.pop("group")
14 |             else:
15 |                 dump_tasks.append(f"ok_vqa_{split}")
16 | 
17 |             save_path = f"./ok_vqa_{split}.yaml"
18 |             print(f"Saving to {save_path}")
19 |             with open(save_path, "w") as f:
20 |                 yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False)
21 | 
22 |     group_dict = {"group": "ok_vqa", "task": dump_tasks}
23 | 
24 |     with open("./_ok_vqa.yaml", "w") as f:
25 |         yaml.dump(group_dict, f, default_flow_style=False, indent=4)
26 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/ok_vqa/_ok_vqa.yaml:
--------------------------------------------------------------------------------
1 | group: ok_vqa
2 | task:
3 | - ok_vqa_val2014


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/ok_vqa/ok_vqa_val2014.yaml:
--------------------------------------------------------------------------------
1 | group: ok_vqa
2 | task: ok_vqa_val2014
3 | test_split: val2014
4 | include: _default_template_vqa_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/olympiadbench/__pycache__/cn_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/olympiadbench/__pycache__/cn_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/olympiadbench/__pycache__/en_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/olympiadbench/__pycache__/en_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/olympiadbench/__pycache__/olympiadbench_evals.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/olympiadbench/__pycache__/olympiadbench_evals.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/olympiadbench/olympiadbench.yaml:
--------------------------------------------------------------------------------
1 | group: olympiadbench
2 | task:
3 | - olympiadbench_test_en
4 | - olympiadbench_test_cn
5 | metadata:
6 |   - version: 0.0
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/olympiadbench/olympiadbench_test_cn.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/OlympiadBench
 2 | dataset_kwargs:
 3 |   token: True
 4 | task : "olympiadbench_test_cn"
 5 | test_split: test_cn
 6 | output_type: generate_until
 7 | doc_to_visual: !function cn_utils.olympiadbench_doc_to_visual
 8 | doc_to_text: !function cn_utils.olympiadbench_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   max_new_tokens: 1024
14 |   temperature: 0
15 |   top_p: 1.0
16 |   num_beams: 1
17 |   do_sample: false
18 | process_results: !function cn_utils.olympiadbench_process_results
19 | metric_list:
20 |   - metric: submission
21 |     aggregation: !function cn_utils.olympiadbench_aggregate_results
22 |     higher_is_better: true
23 |   - metric: exact_match
24 |     aggregation: mean
25 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/olympiadbench/olympiadbench_test_en.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/OlympiadBench
 2 | dataset_kwargs:
 3 |   token: True
 4 | task : "olympiadbench_test_en"
 5 | test_split: test_en
 6 | output_type: generate_until
 7 | doc_to_visual: !function en_utils.olympiadbench_doc_to_visual
 8 | doc_to_text: !function en_utils.olympiadbench_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   max_new_tokens: 1024
14 |   temperature: 0
15 |   top_p: 1.0
16 |   num_beams: 1
17 |   do_sample: false
18 | process_results: !function en_utils.olympiadbench_process_results
19 | metric_list:
20 |   - metric: submission
21 |     aggregation: !function en_utils.olympiadbench_aggregate_results
22 |     higher_is_better: true
23 |   - metric: exact_match
24 |     aggregation: mean
25 |     higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/perceptiontest/test/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/perceptiontest/test/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/perceptiontest/test/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/PerceptionTest
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: perceptiontest
6 | model_specific_prompt_kwargs:
7 |   default:
8 |     pre_prompt: ""
9 |     post_prompt: ""


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/perceptiontest/test/perceptiontest_mc.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "mc_question"
 2 | task: "perceptiontest_test_mc"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.perceptiontest_doc_to_visual
 6 | doc_to_text: !function utils.perceptiontest_doc_to_text
 7 | doc_to_target: !function utils.perceptiontest_doc_to_answer_mc
 8 | process_results: !function utils.perceptiontest_process_results_mc
 9 | metric_list:
10 |   - metric: submission
11 |     aggregation: !function utils.perceptiontest_aggregate_mc
12 |     higher_is_better: true
13 | include: _default_template_yaml
14 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/perceptiontest/test/perceptiontest_mcppl.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "mc_question"
 2 | task: "perceptiontest_test_mcppl"
 3 | test_split: test
 4 | output_type: multiple_choice
 5 | doc_to_visual: !function utils.perceptiontest_doc_to_visual
 6 | doc_to_text: "question"
 7 | doc_to_target: !function utils.perceptiontest_doc_to_answer_mc
 8 | doc_to_choice: !function utils.perceptiontest_doc_to_choice
 9 | process_results: !function utils.perceptiontest_process_results_mc_ppl
10 | metric_list:
11 |   - metric: submission
12 |     aggregation: !function utils.perceptiontest_aggregate_mc_ppl
13 |     higher_is_better: true
14 | include: _default_template_yaml
15 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/perceptiontest/val/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/perceptiontest/val/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/perceptiontest/val/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: /mnt/lzy/video_data/save_data/perceptiontest_val
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: perceptiontest_val
6 | model_specific_prompt_kwargs:
7 |   default:
8 |     pre_prompt: ""
9 |     post_prompt: "Answer with the option's letter from the given choices directly."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/perceptiontest/val/perceptiontest_mc.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "mc_question_val"
 2 | task: "perceptiontest_val_mc"
 3 | test_split: validation
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.perceptiontest_val_doc_to_visual
 6 | doc_to_text: !function utils.perceptiontest_val_doc_to_text
 7 | doc_to_target: !function utils.perceptiontest_val_doc_to_answer
 8 | process_results: !function utils.perceptiontest_val_process_results_mc
 9 | metric_list:
10 |   - metric: accuracy
11 |     aggregation: !function utils.perceptiontest_val_aggregate_accuracy
12 |     higher_is_better: true
13 | include: _default_template_yaml
14 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/perceptiontest/val/perceptiontest_mcppl.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "mc_question_val"
 2 | task: "perceptiontest_val_mcppl"
 3 | test_split: validation
 4 | output_type: multiple_choice
 5 | doc_to_visual: !function utils.perceptiontest_val_doc_to_visual
 6 | doc_to_text: "question"
 7 | doc_to_target: !function utils.perceptiontest_val_doc_to_answer
 8 | doc_to_choice: !function utils.perceptiontest_val_doc_to_choice
 9 | process_results: !function utils.perceptiontest_val_process_results_mc_ppl
10 | metric_list:
11 |   - metric: accuracy
12 |     aggregation: !function utils.perceptiontest_val_aggregate_accuracy
13 |     higher_is_better: true
14 | include: _default_template_yaml
15 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/pope/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/pope/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/pope/pope.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/POPE
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "pope"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.pope_doc_to_visual
 8 | doc_to_text: !function utils.pope_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   max_new_tokens: 128
12 |   temperature: 0
13 |   top_p: 1.0
14 |   num_beams: 1
15 |   do_sample: false
16 | process_results: !function utils.pope_process_results
17 | metric_list:
18 |   - metric: pope_accuracy
19 |     aggregation: !function utils.pope_aggregate_accuracy
20 |     higher_is_better: true
21 |   - metric: pope_precision
22 |     aggregation: !function utils.pope_aggregate_precision
23 |     higher_is_better: true
24 |   - metric: pope_recall
25 |     aggregation: !function utils.pope_aggregate_recall
26 |     higher_is_better: true
27 |   - metric: pope_f1_score
28 |     aggregation: !function utils.pope_aggregate_f1_score
29 |     higher_is_better: true
30 |   - metric: pope_yes_ratio
31 |     aggregation: !function utils.pope_aggregate_yes_ratio
32 |     higher_is_better: true
33 | metadata:
34 |   - version: 0.0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/pope/pope_adv.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/POPE
 2 | dataset_name: Full
 3 | dataset_kwargs:
 4 |   token: True
 5 | task: "pope_adv"
 6 | test_split: adversarial 
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.pope_doc_to_visual
 9 | doc_to_text: !function utils.pope_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   max_new_tokens: 128
13 |   temperature: 0
14 |   top_p: 0
15 |   num_beams: 1
16 |   do_sample: false
17 | process_results: !function utils.pope_process_results
18 | metric_list:
19 |   - metric: pope_accuracy
20 |     aggregation: !function utils.pope_aggregate_accuracy
21 |     higher_is_better: true
22 |   - metric: pope_precision
23 |     aggregation: !function utils.pope_aggregate_precision
24 |     higher_is_better: true
25 |   - metric: pope_recall
26 |     aggregation: !function utils.pope_aggregate_recall
27 |     higher_is_better: true
28 |   - metric: pope_f1_score
29 |     aggregation: !function utils.pope_aggregate_f1_score
30 |     higher_is_better: true
31 |   - metric: pope_yes_ratio
32 |     aggregation: !function utils.pope_aggregate_yes_ratio
33 |     higher_is_better: true
34 | metadata:
35 |   - version: 0.0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/pope/pope_full.yaml:
--------------------------------------------------------------------------------
1 | group : pope_full
2 | task:
3 |   - pope_adv
4 |   - pope_pop
5 |   - pope_random


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/pope/pope_pop.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/POPE
 2 | dataset_name: Full
 3 | dataset_kwargs:
 4 |   token: True
 5 | task: "pope_pop"
 6 | test_split: popular 
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.pope_doc_to_visual
 9 | doc_to_text: !function utils.pope_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   max_new_tokens: 128
13 |   temperature: 0
14 |   top_p: 0
15 |   num_beams: 1
16 |   do_sample: false
17 | process_results: !function utils.pope_process_results
18 | metric_list:
19 |   - metric: pope_accuracy
20 |     aggregation: !function utils.pope_aggregate_accuracy
21 |     higher_is_better: true
22 |   - metric: pope_precision
23 |     aggregation: !function utils.pope_aggregate_precision
24 |     higher_is_better: true
25 |   - metric: pope_recall
26 |     aggregation: !function utils.pope_aggregate_recall
27 |     higher_is_better: true
28 |   - metric: pope_f1_score
29 |     aggregation: !function utils.pope_aggregate_f1_score
30 |     higher_is_better: true
31 |   - metric: pope_yes_ratio
32 |     aggregation: !function utils.pope_aggregate_yes_ratio
33 |     higher_is_better: true
34 | metadata:
35 |   - version: 0.0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/pope/pope_random.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/POPE
 2 | dataset_name: Full
 3 | dataset_kwargs:
 4 |   token: True
 5 | task: "pope_random"
 6 | test_split: random 
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.pope_doc_to_visual
 9 | doc_to_text: !function utils.pope_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   max_new_tokens: 128
13 |   temperature: 0
14 |   top_p: 0
15 |   num_beams: 1
16 |   do_sample: false
17 | process_results: !function utils.pope_process_results
18 | metric_list:
19 |   - metric: pope_accuracy
20 |     aggregation: !function utils.pope_aggregate_accuracy
21 |     higher_is_better: true
22 |   - metric: pope_precision
23 |     aggregation: !function utils.pope_aggregate_precision
24 |     higher_is_better: true
25 |   - metric: pope_recall
26 |     aggregation: !function utils.pope_aggregate_recall
27 |     higher_is_better: true
28 |   - metric: pope_f1_score
29 |     aggregation: !function utils.pope_aggregate_f1_score
30 |     higher_is_better: true
31 |   - metric: pope_yes_ratio
32 |     aggregation: !function utils.pope_aggregate_yes_ratio
33 |     higher_is_better: true
34 | metadata:
35 |   - version: 0.0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/realworldqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/realworldqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/refcoco+/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/_generate_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | 
 4 | # splits = ["train", "val", "testA", "testB"]
 5 | splits = ["val", "testA", "testB"]
 6 | tasks = ["seg", "bbox"]
 7 | 
 8 | if __name__ == "__main__":
 9 |     dump_tasks = []
10 |     for task in tasks:
11 |         for split in splits:
12 |             yaml_dict = {"group": f"refcoco+_{task}", "task": f"refcoco+_{task}_{split}", "include": f"_default_template_{task}_yaml", "test_split": split}
13 |             if split == "train":
14 |                 yaml_dict.pop("group")
15 |             else:
16 |                 dump_tasks.append(f"refcoco_{task}_{split}")
17 | 
18 |             save_path = f"./refcoco+_{task}_{split}.yaml"
19 |             print(f"Saving to {save_path}")
20 |             with open(save_path, "w") as f:
21 |                 yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False)
22 | 
23 |     group_dict = {"group": "refcoco+", "task": dump_tasks}
24 | 
25 |     with open("./_refcoco.yaml", "w") as f:
26 |         yaml.dump(group_dict, f, default_flow_style=False, indent=4)
27 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/_refcoco.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+
2 | task:
3 | - refcoco+_seg_val
4 | - refcoco+_seg_testA
5 | - refcoco+_seg_testB
6 | - refcoco+_bbox_val
7 | - refcoco+_bbox_testA
8 | - refcoco+_bbox_testB
9 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_bbox_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox
2 | task: refcoco+_bbox_testA
3 | include: _default_template_bbox_yaml
4 | test_split: testA
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_bbox_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox
2 | task: refcoco+_bbox_testB
3 | include: _default_template_bbox_yaml
4 | test_split: testB
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_bbox_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_bbox
2 | task: refcoco+_bbox_val
3 | include: _default_template_bbox_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_seg_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_seg
2 | task: refcoco+_seg_testA
3 | include: _default_template_seg_yaml
4 | test_split: testA
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_seg_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_seg
2 | task: refcoco+_seg_testB
3 | include: _default_template_seg_yaml
4 | test_split: testB
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco+/refcoco+_seg_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco+_seg
2 | task: refcoco+_seg_val
3 | include: _default_template_seg_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/refcoco/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/_generate_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | 
 4 | # splits = ["train", "test", "val", "testA", "testB"]
 5 | splits = ["test", "val", "testA", "testB"]
 6 | tasks = ["seg", "bbox"]
 7 | 
 8 | if __name__ == "__main__":
 9 |     dump_tasks = []
10 |     for task in tasks:
11 |         for split in splits:
12 |             yaml_dict = {"group": f"refcoco_{task}", "task": f"refcoco_{task}_{split}", "test_split": split, "include": f"_default_template_{task}_yaml"}
13 |             if split == "train":
14 |                 yaml_dict.pop("group")
15 |             else:
16 |                 dump_tasks.append(f"refcoco_{task}_{split}")
17 | 
18 |             save_path = f"./refcoco_{task}_{split}.yaml"
19 |             print(f"Saving to {save_path}")
20 |             with open(save_path, "w") as f:
21 |                 yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False)
22 | 
23 |     group_dict = {"group": "refcoco", "task": dump_tasks}
24 | 
25 |     with open("./_refcoco.yaml", "w") as f:
26 |         yaml.dump(group_dict, f, default_flow_style=False, indent=4)
27 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/_refcoco.yaml:
--------------------------------------------------------------------------------
 1 | group: refcoco
 2 | task:
 3 | - refcoco_seg_test
 4 | - refcoco_seg_val
 5 | - refcoco_seg_testA
 6 | - refcoco_seg_testB
 7 | - refcoco_bbox_test
 8 | - refcoco_bbox_val
 9 | - refcoco_bbox_testA
10 | - refcoco_bbox_testB
11 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_test
3 | test_split: test
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_testA
3 | test_split: testA
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_testB
3 | test_split: testB
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_bbox_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_bbox
2 | task: refcoco_bbox_val
3 | test_split: val
4 | include: _default_template_bbox_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_seg_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_test
3 | test_split: test
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_seg_testA.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_testA
3 | test_split: testA
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_seg_testB.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_testB
3 | test_split: testB
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcoco/refcoco_seg_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcoco_seg
2 | task: refcoco_seg_val
3 | test_split: val
4 | include: _default_template_seg_yaml
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcocog/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/refcocog/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcocog/_generate_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | 
 4 | # splits = ["train", "test", "val"]
 5 | splits = ["test", "val"]
 6 | tasks = ["seg", "bbox"]
 7 | 
 8 | if __name__ == "__main__":
 9 |     dump_tasks = []
10 |     for task in tasks:
11 |         for split in splits:
12 |             yaml_dict = {"group": f"refcocog_{task}", "task": f"refcocog_{task}_{split}", "include": f"_default_template_{task}_yaml", "test_split": split}
13 |             if split == "train":
14 |                 yaml_dict.pop("group")
15 |             else:
16 |                 dump_tasks.append(f"refcoco_{task}_{split}")
17 | 
18 |             save_path = f"./refcocog_{task}_{split}.yaml"
19 |             print(f"Saving to {save_path}")
20 |             with open(save_path, "w") as f:
21 |                 yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False)
22 | 
23 |     group_dict = {"group": "refcocog", "task": dump_tasks}
24 | 
25 |     with open("./_refcoco.yaml", "w") as f:
26 |         yaml.dump(group_dict, f, default_flow_style=False, indent=4)
27 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcocog/_refcoco.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog
2 | task:
3 | - refcocog_seg_test
4 | - refcocog_seg_val
5 | - refcocog_bbox_test
6 | - refcocog_bbox_val
7 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcocog/refcocog_bbox_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_bbox
2 | task: refcocog_bbox_test
3 | include: _default_template_bbox_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcocog/refcocog_bbox_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_bbox
2 | task: refcocog_bbox_val
3 | include: _default_template_bbox_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcocog/refcocog_seg_test.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_seg
2 | task: refcocog_seg_test
3 | include: _default_template_seg_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/refcocog/refcocog_seg_val.yaml:
--------------------------------------------------------------------------------
1 | group: refcocog_seg
2 | task: refcocog_seg_val
3 | include: _default_template_seg_yaml
4 | test_split: val
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/scienceqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/scienceqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/scienceqa/scienceqa.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/ScienceQA
 2 | dataset_name: ScienceQA-FULL
 3 | task: "scienceqa"
 4 | dataset_kwargs:
 5 |   token: True
 6 | test_split: test
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.sqa_doc_to_visual
 9 | doc_to_text: !function utils.sqa_doc_to_text
10 | doc_to_target: !function utils.sqa_doc_to_target
11 | generation_kwargs:
12 |   max_new_tokens: 16
13 |   temperature: 0
14 |   do_sample: False
15 | metric_list:
16 |   - metric: exact_match
17 |     aggregation: mean
18 |     higher_is_better: true
19 |     ignore_case: true
20 |     ignore_punctuation: true
21 | process_results: !function utils.sqa_process_results
22 | metadata:
23 |   - version: 0.0
24 | 
25 | model_specific_prompt_kwargs:
26 |   default:
27 |     format: default
28 |     pre_prompt: ""
29 |     post_prompt: "\nAnswer with the option's letter from the given choices directly."
30 |   qwen_vl:
31 |     format: qwen_vl
32 |   
33 | model_specific_generation_kwargs:
34 |   llava:
35 |     image_aspect_ratio: original
36 |   
37 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/scienceqa/scienceqa_full.yaml:
--------------------------------------------------------------------------------
1 | group: scienceqa_full
2 | task:
3 |   - scienceqa
4 |   - scienceqa_img


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/scienceqa/scienceqa_img.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/ScienceQA
 2 | dataset_name: ScienceQA-IMG
 3 | task: "scienceqa_img"
 4 | dataset_kwargs:
 5 |   token: True
 6 | test_split: test
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.sqa_doc_to_visual
 9 | doc_to_text: !function utils.sqa_doc_to_text
10 | doc_to_target: !function utils.sqa_doc_to_target
11 | generation_kwargs:
12 |   max_new_tokens: 16
13 |   temperature: 0
14 |   do_sample: False
15 | metric_list:
16 |   - metric: exact_match
17 |     aggregation: mean
18 |     higher_is_better: true
19 |     ignore_case: true
20 |     ignore_punctuation: true
21 | process_results: !function utils.sqa_process_results
22 | metadata:
23 |   - version: 0.0
24 | 
25 | model_specific_prompt_kwargs:
26 |   default:
27 |     format: default
28 |     pre_prompt: ""
29 |     post_prompt: "\nAnswer with the option's letter from the given choices directly."
30 |   qwen_vl:
31 |     format: qwen_vl
32 |   idefics2:
33 |     format: default
34 |     pre_prompt: ""
35 |     post_prompt: "\nAnswer:"
36 | model_specific_generation_kwargs:
37 |   llava:
38 |     image_aspect_ratio: original
39 |   
40 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/screenspot/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/screenspot/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/screenspot/__pycache__/utils_rec.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/screenspot/__pycache__/utils_rec.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/screenspot/_default_template_reg_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: rootsautomation/ScreenSpot
 2 | output_type: generate_until
 3 | doc_to_visual: !function utils.screenspot_bbox_doc_to_visual
 4 | doc_to_text: !function utils.screenspot_doc_to_text
 5 | doc_to_target: "instruction"
 6 | generation_kwargs:
 7 |   until:
 8 |     - "ASSISTANT:"
 9 | process_results: !function utils.screenspot_process_result
10 | metric_list:
11 |   - metric: screenspot_CIDEr
12 |     aggregation : !function utils.screenspot_cider
13 |     higher_is_better : true
14 | metadata:
15 |   version: '0.0'


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/screenspot/_screenspot.yaml:
--------------------------------------------------------------------------------
1 | group: screenspot
2 | task:
3 | - screenspot_reg_test
4 | - screenspot_rec_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/screenspot/screenspot_rec_test.yaml:
--------------------------------------------------------------------------------
1 | group: screenspot_rec
2 | task: screenspot_rec_test
3 | include: _default_template_rec_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/screenspot/screenspot_reg_test.yaml:
--------------------------------------------------------------------------------
1 | group: screenspot_reg
2 | task: screenspot_reg_test
3 | include: _default_template_reg_yaml
4 | test_split: test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/seedbench/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/seedbench/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/seedbench/seedbench.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/SEED-Bench
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "seedbench"
 5 | test_split: test
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.seed_doc_to_visual
 8 | doc_to_text: !function utils.seed_doc_to_text
 9 | doc_to_target: "answer"
10 | generation_kwargs:
11 |   until:
12 |     - "ASSISTANT:"
13 |   image_aspect_ratio: original
14 | # The return value of process_results will be used by metrics
15 | process_results: !function utils.seed_process_result
16 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
17 | metric_list:
18 |   - metric: seed_image
19 |     aggregation: !function utils.seed_aggregation_result
20 |     higher_is_better: true
21 |   - metric: seed_video
22 |     aggregation: !function utils.seed_aggregation_result
23 |     higher_is_better: true
24 |   - metric: seed_all
25 |     aggregation: !function utils.seed_aggregation_result
26 |     higher_is_better: true
27 | metadata:
28 |   - version: 0.0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/seedbench/seedbench_ppl.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/SEED-Bench
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "seedbench_ppl"
 5 | test_split: test
 6 | output_type: multiple_choice
 7 | doc_to_visual: !function utils.seed_doc_to_visual
 8 | doc_to_text: !function utils.seed_doc_to_text_mc
 9 | doc_to_choice : !function utils.seed_doc_to_choice
10 | doc_to_target: !function utils.seed_doc_to_mc_target
11 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
12 | metric_list:
13 |   - metric: acc
14 | metadata:
15 |   - version: 0.0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/seedbench_2/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/seedbench_2/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/stvqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/stvqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/stvqa/stvqa.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/ST-VQA
 2 | task: "stvqa"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.stvqa_doc_to_visual
 6 | doc_to_text: !function utils.stvqa_doc_to_text
 7 | doc_to_target: "answers"
 8 | generation_kwargs:
 9 |   max_new_tokens: 32
10 |   temperature: 0
11 |   do_sample: False
12 | process_results: !function utils.stvqa_process_results
13 | metric_list:
14 |   - metric: submission
15 |     aggregation: !function utils.stvqa_aggregate_submissions
16 | model_specific_prompt_kwargs:
17 |   default:
18 |     pre_prompt: ""
19 |     post_prompt: "\nAnswer the question using a single word or phrase."
20 |   


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/stvqa/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import logging
 4 | 
 5 | from lmms_eval.tasks._task_utils.file_utils import generate_submission_file
 6 | 
 7 | 
 8 | def stvqa_doc_to_text(doc, model_specific_prompt_kwargs):
 9 |     question = doc["question"]
10 |     pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
11 |     post_prompt = model_specific_prompt_kwargs["post_prompt"]
12 |     return f"{pre_prompt}{question}{post_prompt}"
13 | 
14 | 
15 | def stvqa_doc_to_visual(doc):
16 |     return [doc["image"].convert("RGB")]
17 | 
18 | 
19 | def stvqa_process_results(doc, results):
20 |     answer = results[0]
21 |     return {"submission": {"question_id": int(doc["question_id"]), "answer": answer}}
22 | 
23 | 
24 | def stvqa_aggregate_submissions(results, args):
25 |     file = generate_submission_file("stvqa_test_for_submission.json", args)
26 |     with open(file, "w") as f:
27 |         json.dump(results, f)
28 |     logging.getLogger("lmms-eval").info(f"Results saved to {file}")
29 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/synthdog/__pycache__/donut_evaluator.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/synthdog/__pycache__/donut_evaluator.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/synthdog/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/synthdog/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/synthdog/synthdog.yaml:
--------------------------------------------------------------------------------
1 | group: synthdog
2 | task:
3 | - synthdog_en
4 | - synthdog_zh


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/synthdog/synthdog_en.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: naver-clova-ix/synthdog-en
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "synthdog_en"
 5 | test_split: validation
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.synthdog_doc_to_visual
 8 | doc_to_text: OCR this image section by section, from top to bottom, and left to right. Do not insert line breaks in the output text. If a word is split due to a line break in the image, use a space instead.
 9 | doc_to_target: !function utils.synthdog_doc_to_target
10 | generation_kwargs:
11 |   max_new_tokens: 1024
12 |   temperature: 0
13 |   top_p: 1.0
14 |   num_beams: 1
15 |   do_sample: false
16 | process_results: !function utils.synthdog_process_results
17 | metric_list:
18 |   - metric: tree_edit_distance
19 |     aggregation: !function utils.synthdog_aggregate_ted
20 |     higher_is_better: true
21 | metadata:
22 |   - version: 0.0
23 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/synthdog/synthdog_zh.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: naver-clova-ix/synthdog-zh
 2 | dataset_kwargs:
 3 |   token: True
 4 | task: "synthdog_zh"
 5 | test_split: validation
 6 | output_type: generate_until
 7 | doc_to_visual: !function utils.synthdog_doc_to_visual
 8 | doc_to_text: OCR this image section by section, from top to bottom, and left to right. Do not insert line breaks in the output text. If a word is split due to a line break in the image, use a space instead.
 9 | doc_to_target: !function utils.synthdog_doc_to_target
10 | generation_kwargs:
11 |   max_new_tokens: 1024
12 |   temperature: 0
13 |   top_p: 1.0
14 |   num_beams: 1
15 |   do_sample: false
16 | process_results: !function utils.synthdog_process_results
17 | metric_list:
18 |   - metric: tree_edit_distance
19 |     aggregation: !function utils.synthdog_aggregate_ted
20 |     higher_is_better: true
21 | metadata:
22 |   - version: 0.0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/textcaps/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/textcaps/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/textcaps/_default_template_textcaps_yaml:
--------------------------------------------------------------------------------
1 | model_specific_prompt_kwargs:
2 |   default:
3 |     prompt: Provide a one-sentence caption for the provided image.


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/textcaps/textcaps.yaml:
--------------------------------------------------------------------------------
1 | group : textcaps
2 | task:
3 |   - textcaps_val
4 |   - textcaps_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/textcaps/textcaps_test.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/TextCaps
 2 | dataset_kwargs:
 3 |   token: True
 4 | task : "textcaps_test"
 5 | group : "textcaps_caption"
 6 | test_split: test
 7 | output_type: generate_until
 8 | doc_to_visual: !function utils.textcaps_doc_to_visual
 9 | doc_to_text: !function utils.textcaps_doc_to_text
10 | doc_to_target: "answer"
11 | generation_kwargs:
12 |   max_new_tokens: 64
13 |   temperature: 0
14 |   top_p: 1.0
15 |   num_beams: 1
16 |   do_sample: false
17 | process_results: !function utils.textcaps_test_process_result
18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
19 | metric_list:
20 |   - metric: textcaps_passthrough 
21 |     aggregation : !function utils.textcaps_test_aggregation_result
22 |     higher_is_better : true
23 | metadata:
24 |   - version: 0.0
25 | include: _default_template_textcaps_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/textvqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/textvqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/textvqa/_default_template_textvqa_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/textvqa
 2 | output_type: generate_until
 3 | doc_to_visual: !function utils.textvqa_doc_to_visual
 4 | doc_to_text: !function utils.textvqa_doc_to_text
 5 | doc_to_target: "answer"
 6 | generation_kwargs:
 7 |   until:
 8 |     - "ASSISTANT:"
 9 | process_results: !function utils.textvqa_process_results
10 | model_specific_prompt_kwargs:
11 |   default:
12 |     pre_prompt: ""
13 |     post_prompt: "\nAnswer the question using a single word or phrase."
14 |     ocr: false
15 |   qwen_vl:
16 |     pre_prompt: ""
17 |     post_prompt: " Answer:"
18 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/textvqa/_textvqa.yaml:
--------------------------------------------------------------------------------
1 | group: textvqa
2 | task:
3 | - textvqa_val
4 | - textvqa_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/textvqa/textvqa_test.yaml:
--------------------------------------------------------------------------------
1 | task: textvqa_test
2 | test_split: test
3 | metric_list:
4 |   - metric: submission
5 |     aggregation: !function utils.textvqa_aggregate_submissions
6 |     higher_is_better: true
7 | include: _default_template_textvqa_yaml
8 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/textvqa/textvqa_val.yaml:
--------------------------------------------------------------------------------
 1 | task: textvqa_val
 2 | test_split: validation
 3 | metric_list:
 4 |   - metric: exact_match
 5 |     aggregation: mean
 6 |     higher_is_better: true
 7 |     ignore_case: true
 8 |     ignore_punctuation: true
 9 |   - metric: submission
10 |     aggregation: !function utils.textvqa_aggregate_submissions
11 |     higher_is_better: true
12 | include: _default_template_textvqa_yaml
13 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vatex/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/vatex/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vatex/_vatex.yaml:
--------------------------------------------------------------------------------
1 | group : vatex
2 | task:
3 | - vatex_val_zh
4 | - vatex_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vcr_wiki/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/vcr_wiki/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vcr_wiki/_default_template_vcr_yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | dataset_kwargs:
 3 |   token: True
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.vcr_doc_to_visual
 6 | doc_to_text: !function utils.vcr_doc_to_text
 7 | doc_to_target: "answer"
 8 | generation_kwargs:
 9 |   max_new_tokens: 120
10 |   temperature: 0
11 |   top_p: 0
12 |   num_beams: 1
13 |   do_sample: false
14 | # The return value of process_results will be used by metrics
15 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
16 | metadata:
17 |   - version: 0.0.1


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vcr_wiki/vcr_wiki_en_easy.yaml:
--------------------------------------------------------------------------------
 1 | "include": "_default_template_vcr_yaml"
 2 | dataset_path: vcr-org/VCR-wiki-en-easy-test
 3 | task: "vcr_wiki_en_easy"
 4 | test_split: test
 5 | process_results: !function utils.vcr_en_process_results
 6 | metric_list:
 7 |   - metric: jaccard
 8 |     aggregation: !function utils.vcr_aggregate_jaccard
 9 |     higher_is_better: true
10 |   - metric: exact_match
11 |     aggregation: !function utils.vcr_aggregate_exact_match
12 |     higher_is_better: true
13 | model_specific_prompt_kwargs:
14 |   default:
15 |     pre_prompt: ""
16 |     post_prompt: "What is the covered texts in the image? Please restore the covered texts without outputting the explanations."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vcr_wiki/vcr_wiki_en_easy_100.yaml:
--------------------------------------------------------------------------------
 1 | "include": "_default_template_vcr_yaml"
 2 | dataset_path: vcr-org/VCR-wiki-en-easy-test-100
 3 | task: "vcr_wiki_en_easy_100"
 4 | test_split: test
 5 | process_results: !function utils.vcr_en_process_results
 6 | metric_list:
 7 |   - metric: jaccard
 8 |     aggregation: !function utils.vcr_aggregate_jaccard
 9 |     higher_is_better: true
10 |   - metric: exact_match
11 |     aggregation: !function utils.vcr_aggregate_exact_match
12 |     higher_is_better: true
13 | model_specific_prompt_kwargs:
14 |   default:
15 |     pre_prompt: ""
16 |     post_prompt: "What is the covered texts in the image? Please restore the covered texts without outputting the explanations."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vcr_wiki/vcr_wiki_en_easy_500.yaml:
--------------------------------------------------------------------------------
 1 | "include": "_default_template_vcr_yaml"
 2 | dataset_path: vcr-org/VCR-wiki-en-easy-test-500
 3 | task: "vcr_wiki_en_easy_500"
 4 | test_split: test
 5 | process_results: !function utils.vcr_en_process_results
 6 | metric_list:
 7 |   - metric: jaccard
 8 |     aggregation: !function utils.vcr_aggregate_jaccard
 9 |     higher_is_better: true
10 |   - metric: exact_match
11 |     aggregation: !function utils.vcr_aggregate_exact_match
12 |     higher_is_better: true
13 | model_specific_prompt_kwargs:
14 |   default:
15 |     pre_prompt: ""
16 |     post_prompt: "What is the covered texts in the image? Please restore the covered texts without outputting the explanations."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vcr_wiki/vcr_wiki_en_hard.yaml:
--------------------------------------------------------------------------------
 1 | "include": "_default_template_vcr_yaml"
 2 | dataset_path: vcr-org/VCR-wiki-en-hard-test
 3 | task: "vcr_wiki_en_hard"
 4 | test_split: test
 5 | process_results: !function utils.vcr_en_process_results
 6 | metric_list:
 7 |   - metric: jaccard
 8 |     aggregation: !function utils.vcr_aggregate_jaccard
 9 |     higher_is_better: true
10 |   - metric: exact_match
11 |     aggregation: !function utils.vcr_aggregate_exact_match
12 |     higher_is_better: true
13 | model_specific_prompt_kwargs:
14 |   default:
15 |     pre_prompt: ""
16 |     post_prompt: "What is the covered texts in the image? Please restore the covered texts without outputting the explanations."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vcr_wiki/vcr_wiki_en_hard_100.yaml:
--------------------------------------------------------------------------------
 1 | "include": "_default_template_vcr_yaml"
 2 | dataset_path: vcr-org/VCR-wiki-en-hard-test-100
 3 | task: "vcr_wiki_en_hard_100"
 4 | test_split: test
 5 | process_results: !function utils.vcr_en_process_results
 6 | metric_list:
 7 |   - metric: jaccard
 8 |     aggregation: !function utils.vcr_aggregate_jaccard
 9 |     higher_is_better: true
10 |   - metric: exact_match
11 |     aggregation: !function utils.vcr_aggregate_exact_match
12 |     higher_is_better: true
13 | model_specific_prompt_kwargs:
14 |   default:
15 |     pre_prompt: ""
16 |     post_prompt: "What is the covered texts in the image? Please restore the covered texts without outputting the explanations."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vcr_wiki/vcr_wiki_en_hard_500.yaml:
--------------------------------------------------------------------------------
 1 | "include": "_default_template_vcr_yaml"
 2 | dataset_path: vcr-org/VCR-wiki-en-hard-test-500
 3 | task: "vcr_wiki_en_hard_500"
 4 | test_split: test
 5 | process_results: !function utils.vcr_en_process_results
 6 | metric_list:
 7 |   - metric: jaccard
 8 |     aggregation: !function utils.vcr_aggregate_jaccard
 9 |     higher_is_better: true
10 |   - metric: exact_match
11 |     aggregation: !function utils.vcr_aggregate_exact_match
12 |     higher_is_better: true
13 | model_specific_prompt_kwargs:
14 |   default:
15 |     pre_prompt: ""
16 |     post_prompt: "What is the covered texts in the image? Please restore the covered texts without outputting the explanations."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vcr_wiki/vcr_wiki_zh_easy.yaml:
--------------------------------------------------------------------------------
 1 | "include": "_default_template_vcr_yaml"
 2 | dataset_path: vcr-org/VCR-wiki-zh-easy-test
 3 | task: "vcr_wiki_zh_easy"
 4 | test_split: test
 5 | process_results: !function utils.vcr_zh_process_results
 6 | metric_list:
 7 |   - metric: jaccard
 8 |     aggregation: !function utils.vcr_aggregate_jaccard
 9 |     higher_is_better: true
10 |   - metric: exact_match
11 |     aggregation: !function utils.vcr_aggregate_exact_match
12 |     higher_is_better: true
13 | model_specific_prompt_kwargs:
14 |   default:
15 |     pre_prompt: ""
16 |     post_prompt: "图像中被覆盖的文本是什么？请在不输出解释的情况下还原被覆盖的文本。"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vcr_wiki/vcr_wiki_zh_easy_100.yaml:
--------------------------------------------------------------------------------
 1 | "include": "_default_template_vcr_yaml"
 2 | dataset_path: vcr-org/VCR-wiki-zh-easy-test-100
 3 | task: "vcr_wiki_zh_easy_100"
 4 | test_split: test
 5 | process_results: !function utils.vcr_zh_process_results
 6 | metric_list:
 7 |   - metric: jaccard
 8 |     aggregation: !function utils.vcr_aggregate_jaccard
 9 |     higher_is_better: true
10 |   - metric: exact_match
11 |     aggregation: !function utils.vcr_aggregate_exact_match
12 |     higher_is_better: true
13 | model_specific_prompt_kwargs:
14 |   default:
15 |     pre_prompt: ""
16 |     post_prompt: "图像中被覆盖的文本是什么？请在不输出解释的情况下还原被覆盖的文本。"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vcr_wiki/vcr_wiki_zh_easy_500.yaml:
--------------------------------------------------------------------------------
 1 | "include": "_default_template_vcr_yaml"
 2 | dataset_path: vcr-org/VCR-wiki-zh-easy-test-500
 3 | task: "vcr_wiki_zh_easy_500"
 4 | test_split: test
 5 | process_results: !function utils.vcr_zh_process_results
 6 | metric_list:
 7 |   - metric: jaccard
 8 |     aggregation: !function utils.vcr_aggregate_jaccard
 9 |     higher_is_better: true
10 |   - metric: exact_match
11 |     aggregation: !function utils.vcr_aggregate_exact_match
12 |     higher_is_better: true
13 | model_specific_prompt_kwargs:
14 |   default:
15 |     pre_prompt: ""
16 |     post_prompt: "图像中被覆盖的文本是什么？请在不输出解释的情况下还原被覆盖的文本。"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vcr_wiki/vcr_wiki_zh_hard.yaml:
--------------------------------------------------------------------------------
 1 | "include": "_default_template_vcr_yaml"
 2 | dataset_path: vcr-org/VCR-wiki-zh-hard-test
 3 | task: "vcr_wiki_zh_hard"
 4 | test_split: test
 5 | process_results: !function utils.vcr_zh_process_results
 6 | metric_list:
 7 |   - metric: jaccard
 8 |     aggregation: !function utils.vcr_aggregate_jaccard
 9 |     higher_is_better: true
10 |   - metric: exact_match
11 |     aggregation: !function utils.vcr_aggregate_exact_match
12 |     higher_is_better: true
13 | model_specific_prompt_kwargs:
14 |   default:
15 |     pre_prompt: ""
16 |     post_prompt: "图像中被覆盖的文本是什么？请在不输出解释的情况下还原被覆盖的文本。"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vcr_wiki/vcr_wiki_zh_hard_100.yaml:
--------------------------------------------------------------------------------
 1 | "include": "_default_template_vcr_yaml"
 2 | dataset_path: vcr-org/VCR-wiki-zh-hard-test-100
 3 | task: "vcr_wiki_zh_hard_100"
 4 | test_split: test
 5 | process_results: !function utils.vcr_zh_process_results
 6 | metric_list:
 7 |   - metric: jaccard
 8 |     aggregation: !function utils.vcr_aggregate_jaccard
 9 |     higher_is_better: true
10 |   - metric: exact_match
11 |     aggregation: !function utils.vcr_aggregate_exact_match
12 |     higher_is_better: true
13 | model_specific_prompt_kwargs:
14 |   default:
15 |     pre_prompt: ""
16 |     post_prompt: "图像中被覆盖的文本是什么？请在不输出解释的情况下还原被覆盖的文本。"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vcr_wiki/vcr_wiki_zh_hard_500.yaml:
--------------------------------------------------------------------------------
 1 | "include": "_default_template_vcr_yaml"
 2 | dataset_path: vcr-org/VCR-wiki-zh-hard-test-500
 3 | task: "vcr_wiki_zh_hard_500"
 4 | test_split: test
 5 | process_results: !function utils.vcr_zh_process_results
 6 | metric_list:
 7 |   - metric: jaccard
 8 |     aggregation: !function utils.vcr_aggregate_jaccard
 9 |     higher_is_better: true
10 |   - metric: exact_match
11 |     aggregation: !function utils.vcr_aggregate_exact_match
12 |     higher_is_better: true
13 | model_specific_prompt_kwargs:
14 |   default:
15 |     pre_prompt: ""
16 |     post_prompt: "图像中被覆盖的文本是什么？请在不输出解释的情况下还原被覆盖的文本。"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/video_detail_description/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/video_detail_description/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/video_detail_description/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/VideoDetailDescription
 2 | dataset_kwargs:
 3 |   token: True
 4 |   video: True
 5 |   cache_dir: videochatgpt
 6 | model_specific_prompt_kwargs:
 7 |   default:
 8 |     pre_prompt: ""
 9 |     post_prompt: ""
10 | 
11 | metadata:
12 |   version: 0.0
13 |   gpt_eval_model_name: gpt-3.5-0613


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/video_detail_description/video_detail_description.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "Default"
 2 | task: "video_dc499"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.video_detail_description_doc_to_visual
 6 | doc_to_text: !function utils.video_detail_description_doc_to_text
 7 | doc_to_target: !function utils.video_detail_description_doc_to_answer
 8 | process_results: !function utils.video_detail_description_process_results_generic
 9 | metric_list:
10 |   - metric: gpt_eval_score
11 |     aggregation: !function utils.video_detail_description_aggregate_score
12 |     higher_is_better: true
13 | include: _default_template_yaml


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/videochatgpt/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/videochatgpt/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/videochatgpt/_default_template_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/VideoChatGPT
 2 | dataset_kwargs:
 3 |   token: True
 4 |   video: True
 5 |   cache_dir: videochatgpt
 6 | model_specific_prompt_kwargs:
 7 |   default:
 8 |     pre_prompt: ""
 9 |     post_prompt: ""
10 | 
11 | metadata:
12 |   version: 0.0
13 |   gpt_eval_model_name: gpt-3.5-0613


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/videochatgpt/_videochatgpt.yaml:
--------------------------------------------------------------------------------
1 | group: videochatgpt
2 | task:
3 | - videochatgpt_gen
4 | - videochatgpt_temporal
5 | - videochatgpt_consistency
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/videochatgpt/videochatgpt_consistency.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "Consistency"
 2 | task: "videochatgpt_consistency"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.videochatgpt_doc_to_visual
 6 | doc_to_text: !function utils.videochatgpt_doc_to_text_consistency
 7 | doc_to_target: !function utils.videochatgpt_doc_to_answer
 8 | process_results: !function utils.videochatgpt_process_results_consistency
 9 | metric_list:
10 |   - metric: gpt_eval_score_consistency
11 |     aggregation: !function utils.videochatgpt_aggregate_consistency
12 |     higher_is_better: true
13 | include: _default_template_yaml
14 | 
15 | generation_kwargs:
16 |   until:
17 |     - "ASSISTANT:"
18 |   image_aspect_ratio: original
19 |   max_new_tokens: 1024
20 |   temperature: 0
21 |   top_p: 1.0
22 |   num_beams: 1
23 |   do_sample: false
24 |   


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/videochatgpt/videochatgpt_generic.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "Generic"
 2 | task: "videochatgpt_gen"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.videochatgpt_doc_to_visual
 6 | doc_to_text: !function utils.videochatgpt_doc_to_text
 7 | doc_to_target: !function utils.videochatgpt_doc_to_answer
 8 | process_results: !function utils.videochatgpt_process_results_generic
 9 | metric_list:
10 |   - metric: gpt_eval_score_correctness
11 |     aggregation: !function utils.videochatgpt_aggregate_score
12 |     higher_is_better: true
13 |   - metric: gpt_eval_score_detailed_orientation
14 |     aggregation: !function utils.videochatgpt_aggregate_score
15 |     higher_is_better: true
16 |   - metric: gpt_eval_score_context
17 |     aggregation: !function utils.videochatgpt_aggregate_score
18 |     higher_is_better: true
19 | include: _default_template_yaml
20 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/videochatgpt/videochatgpt_temporal.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "Temporal"
 2 | task: "videochatgpt_temporal"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.videochatgpt_doc_to_visual
 6 | doc_to_text: !function utils.videochatgpt_doc_to_text
 7 | doc_to_target: !function utils.videochatgpt_doc_to_answer
 8 | process_results: !function utils.videochatgpt_process_results_temporal
 9 | metric_list:
10 |   - metric: gpt_eval_score_temporal
11 |     aggregation: !function utils.videochatgpt_aggregate_score
12 |     higher_is_better: true
13 | include: _default_template_yaml
14 | 
15 | generation_kwargs:
16 |   until:
17 |     - "ASSISTANT:"
18 |   image_aspect_ratio: original
19 |   max_new_tokens: 1024
20 |   temperature: 0
21 |   top_p: 1.0
22 |   num_beams: 1
23 |   do_sample: false
24 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/videomme/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/videomme/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/videomme_fix/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/videomme_fix/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vizwiz_vqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/vizwiz_vqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vizwiz_vqa/_default_template_vqa_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/VizWiz-VQA
 2 | output_type: generate_until
 3 | doc_to_visual: !function utils.vizwiz_vqa_doc_to_visual
 4 | doc_to_text: !function utils.vizwiz_vqa_doc_to_text
 5 | doc_to_target: "answer"
 6 | generation_kwargs:
 7 |   until:
 8 |     - "ASSISTANT:"
 9 | metadata:
10 |   - version: 0.0
11 | model_specific_prompt_kwargs:
12 |   default:
13 |     pre_prompt: ""
14 |     post_prompt: "\nWhen the provided information is insufficient, respond with 'Unanswerable'.\nAnswer the question using a single word or phrase."
15 | process_results: !function utils.vizwiz_vqa_process_results
16 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vizwiz_vqa/_generate_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | 
 4 | splits = ["val", "test"]
 5 | tasks = ["vqa"]
 6 | 
 7 | if __name__ == "__main__":
 8 |     dump_tasks = []
 9 |     for task in tasks:
10 |         for split in splits:
11 |             yaml_dict = {"group": f"vizwiz_{task}", "task": f"vizwiz_{task}_{split}", "include": f"_default_template_{task}_yaml", "test_split": split}
12 |             if split == "train":
13 |                 yaml_dict.pop("group")
14 |             else:
15 |                 dump_tasks.append(f"vizwiz_{task}_{split}")
16 | 
17 |             save_path = f"./vizwiz_{task}_{split}.yaml"
18 |             print(f"Saving to {save_path}")
19 |             with open(save_path, "w") as f:
20 |                 yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False)
21 | 
22 |     group_dict = {"group": "vizwiz_vqa", "task": dump_tasks}
23 | 
24 |     with open("./_vizwiz_vqa.yaml", "w") as f:
25 |         yaml.dump(group_dict, f, default_flow_style=False, indent=4)
26 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vizwiz_vqa/_vizwiz_vqa.yaml:
--------------------------------------------------------------------------------
1 | group: vizwiz_vqa
2 | task:
3 | - vizwiz_vqa_val
4 | - vizwiz_vqa_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_test.yaml:
--------------------------------------------------------------------------------
 1 | group: vizwiz_vqa
 2 | task: vizwiz_vqa_test
 3 | test_split: test
 4 | include: _default_template_vqa_yaml
 5 | process_results: !function utils.vizwiz_vqa_process_results
 6 | metric_list:
 7 |   # - metric: exact_match
 8 |   #   aggregation: mean
 9 |   #   higher_is_better: true
10 |   #   ignore_case: true
11 |   #   ignore_punctuation: true
12 |   - metric: submission
13 |     aggregation: !function utils.vizwiz_vqa_aggregate_submissions
14 |     higher_is_better: true
15 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_val.yaml:
--------------------------------------------------------------------------------
 1 | group: vizwiz_vqa
 2 | task: vizwiz_vqa_val
 3 | test_split: val
 4 | include: _default_template_vqa_yaml
 5 | metric_list:
 6 |   - metric: exact_match
 7 |     aggregation: mean
 8 |     higher_is_better: true
 9 |     ignore_case: true
10 |     ignore_punctuation: true
11 |   # - metric: submission
12 |   #   aggregation: !function utils.vizwiz_vqa_aggregate_submissions
13 |   #   higher_is_better: true


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vqav2/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/vqav2/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vqav2/_default_template_vqav2_yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: lmms-lab/VQAv2
 2 | dataset_kwargs:
 3 |   token: True
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.vqav2_doc_to_visual
 6 | doc_to_text: !function utils.vqav2_doc_to_text
 7 | doc_to_target: "answer"
 8 | generation_kwargs:
 9 |   max_new_tokens: 16
10 | metadata:
11 |   - version: 0.0
12 | model_specific_prompt_kwargs:
13 |   default:
14 |     pre_prompt: ""
15 |     post_prompt: "\nAnswer the question using a single word or phrase."


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vqav2/_vqav2.yaml:
--------------------------------------------------------------------------------
1 | group: vqav2
2 | task:
3 | - vqav2_val
4 | - vqav2_test


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vqav2/vqav2_test.yaml:
--------------------------------------------------------------------------------
1 | task: "vqav2_test"
2 | include: _default_template_vqav2_yaml
3 | test_split: test
4 | metric_list:
5 |   - metric: submission
6 |     aggregation: !function utils.vqav2_aggregate_submissions
7 |     higher_is_better: true
8 | process_results: !function utils.vqav2_process_results_test
9 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/vqav2/vqav2_val.yaml:
--------------------------------------------------------------------------------
 1 | task: "vqav2_val"
 2 | include: _default_template_vqav2_yaml
 3 | test_split: validation
 4 | metric_list:
 5 |   - metric: exact_match
 6 |     aggregation: mean
 7 |     higher_is_better: true
 8 |     ignore_case: true
 9 |     ignore_punctuation: true
10 | process_results: !function utils.vqav2_process_results_val
11 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/websrc/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/websrc/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/websrc/websrc.yaml:
--------------------------------------------------------------------------------
1 | group: websrc
2 | task:
3 | - websrc_val
4 | - websrc_test
5 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/websrc/websrc_test.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: rootsautomation/websrc-test
 2 | task: "websrc_test"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.websrc_doc_to_visual
 6 | doc_to_text: !function utils.websrc_doc_to_text
 7 | doc_to_target: "answer"
 8 | # The return value of process_results will be used by metrics
 9 | process_results: !function utils.websrc_process_results
10 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
11 | generation_kwargs:
12 |   max_new_tokens: 16
13 |   image_aspect_ratio: pad
14 | metric_list:
15 |   - metric: submission
16 |     aggregation: !function utils.websrc_test_aggregate_results_for_submission
17 |     higher_is_better: true
18 | metadata:
19 |   - version: 0.0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/websrc/websrc_val.yaml:
--------------------------------------------------------------------------------
 1 | dataset_path: rootsautomation/websrc
 2 | task: "websrc_val"
 3 | test_split: dev
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.websrc_doc_to_visual
 6 | doc_to_text: !function utils.websrc_doc_to_text
 7 | doc_to_target: "answer"
 8 | # The return value of process_results will be used by metrics
 9 | process_results: !function utils.websrc_process_results
10 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
11 | generation_kwargs:
12 |   max_new_tokens: 16
13 |   image_aspect_ratio: pad
14 | metric_list:
15 |   - metric: websrc_squad_f1
16 |     aggregation: !function utils.websrc_aggregate_results
17 |     higher_is_better: true
18 | metadata:
19 |   - version: 0.0


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/worldqa/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/worldqa/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/worldqa/__pycache__/worldqa_mc_evaluator.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/worldqa/__pycache__/worldqa_mc_evaluator.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/worldqa/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/worldqa
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: multi-hop-reasoning 
6 | metadata:
7 |   version: 0.0
8 |   gpt_eval_model_name: "gpt-4-0613"


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/worldqa/worldqa.yaml:
--------------------------------------------------------------------------------
1 | group: worldqa
2 | task:
3 | - worldqa_gen
4 | - worldqa_mc
5 | 
6 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/worldqa/worldqa_generation.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "Generation"
 2 | task: "worldqa_gen"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.worldqa_doc_to_visual
 6 | doc_to_text: !function utils.worldqa_doc_to_text
 7 | doc_to_target: !function utils.worldqa_doc_to_answer
 8 | process_results: !function utils.worldqa_process_results
 9 | metric_list:
10 |   - metric: submission
11 |     aggregation: !function utils.worldqa_aggregate_gen
12 |     higher_is_better: true
13 |   - metric: gpt_eval
14 |     aggregation: !function utils.worldq_gen_gpt_eval
15 |     higher_is_better: true  
16 | model_specific_prompt_kwargs:
17 |   default:
18 |     pre_prompt: ""
19 |     post_prompt: ""
20 | include: _default_template_yaml
21 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/worldqa/worldqa_mc.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "MC"
 2 | task: "worldqa_mc"
 3 | test_split: test
 4 | output_type: generate_until
 5 | doc_to_visual: !function utils.worldqa_doc_to_visual
 6 | doc_to_text: !function utils.worldqa_doc_to_text
 7 | doc_to_target: !function utils.worldqa_doc_to_answer_mc
 8 | process_results: !function utils.worldqa_process_results_mc
 9 | metric_list:
10 |   - metric: gpt_eval
11 |     aggregation: !function utils.worldqa_aggregate_mc_eval
12 |     higher_is_better: true
13 | model_specific_prompt_kwargs:
14 |   default:
15 |     pre_prompt: ""
16 |     post_prompt: "\nAnswer with the option's letter from the given choices directly."
17 | filter_list:
18 |   - name: "flexible-extract"
19 |     filter:
20 |       - function: !function utils.MultiChoiceRegexFilter
21 |         group_select: 0
22 |         ignore_case: true
23 |         ignore_punctuation: true
24 |         regex_pattern: "(\\([A-Z]\\))"
25 | 
26 | include: _default_template_yaml
27 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/worldqa/worldqa_mcppl.yaml:
--------------------------------------------------------------------------------
 1 | dataset_name: "MC_PPL"
 2 | task: "worldqa_mc_ppl"
 3 | test_split: test
 4 | output_type: multiple_choice
 5 | doc_to_visual: !function utils.worldqa_doc_to_visual
 6 | doc_to_text: "question"
 7 | doc_to_target: !function utils.worldqa_doc_to_answer_mc_ppl
 8 | doc_to_choice: !function utils.worldqa_doc_to_choice
 9 | metric_list:
10 |   - metric: acc
11 | model_specific_prompt_kwargs:
12 |   default:
13 |     pre_prompt: ""
14 |     post_prompt: ""
15 | include: _default_template_yaml
16 | 


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/youcook2/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/lmms-eval/lmms_eval/tasks/youcook2/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/lmms-eval/lmms_eval/tasks/youcook2/_default_template_yaml:
--------------------------------------------------------------------------------
1 | dataset_path: lmms-lab/YouCook2
2 | dataset_kwargs:
3 |   token: True
4 |   video: True
5 |   cache_dir: YouCookIIVideos
6 | 


--------------------------------------------------------------------------------
/lmms-eval/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 | 
3 | # This is to make sure that the package supports editable installs
4 | setuptools.setup()
5 | 


--------------------------------------------------------------------------------
/oryx/__init__.py:
--------------------------------------------------------------------------------
1 | from .model import OryxLlamaForCausalLM
2 | 


--------------------------------------------------------------------------------
/oryx/constants.py:
--------------------------------------------------------------------------------
 1 | CONTROLLER_HEART_BEAT_EXPIRATION = 30
 2 | WORKER_HEART_BEAT_INTERVAL = 15
 3 | 
 4 | LOGDIR = "."
 5 | 
 6 | # Model Constants
 7 | IGNORE_INDEX = -100
 8 | IMAGE_TOKEN_INDEX = -200
 9 | DEFAULT_IMAGE_TOKEN = "<image>"
10 | DEFAULT_IMAGE_PATCH_TOKEN = "<im_patch>"
11 | DEFAULT_IM_START_TOKEN = "<im_start>"
12 | DEFAULT_IM_END_TOKEN = "<im_end>"
13 | 


--------------------------------------------------------------------------------
/oryx/model/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | AVAILABLE_MODELS = {
 4 |     "oryx_llama": "OryxLlamaForCausalLM, OryxConfig",
 5 |     "oryx_qwen": "OryxQwenForCausalLM, OryxQwenConfig",
 6 |     # Add other models as needed
 7 | }
 8 | 
 9 | for model_name, model_classes in AVAILABLE_MODELS.items():
10 |     try:
11 |         exec(f"from .language_model.{model_name} import {model_classes}")
12 |     except Exception as e:
13 |         raise e
14 |         print(f"Failed to import {model_name} from llava.language_model.{model_name}")
15 | 


--------------------------------------------------------------------------------
/oryx/model/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/oryx/model/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/oryx/model/__pycache__/builder.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/oryx/model/__pycache__/builder.cpython-310.pyc


--------------------------------------------------------------------------------
/oryx/model/__pycache__/oryx_arch.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/oryx/model/__pycache__/oryx_arch.cpython-310.pyc


--------------------------------------------------------------------------------
/oryx/model/language_model/__pycache__/oryx_llama.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/oryx/model/language_model/__pycache__/oryx_llama.cpython-310.pyc


--------------------------------------------------------------------------------
/oryx/model/language_model/__pycache__/oryx_qwen.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/oryx/model/language_model/__pycache__/oryx_qwen.cpython-310.pyc


--------------------------------------------------------------------------------
/oryx/model/multimodal_encoder/__pycache__/builder.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/oryx/model/multimodal_encoder/__pycache__/builder.cpython-310.pyc


--------------------------------------------------------------------------------
/oryx/model/multimodal_encoder/__pycache__/oryx_vit.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/oryx/model/multimodal_encoder/__pycache__/oryx_vit.cpython-310.pyc


--------------------------------------------------------------------------------
/oryx/model/multimodal_encoder/builder.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from .oryx_vit import OryxViTWrapper
 3 | 
 4 | def build_vision_tower(vision_tower_cfg, **kwargs):
 5 |     vision_tower = getattr(vision_tower_cfg, 'vision_tower', getattr(vision_tower_cfg, 'mm_vision_tower', None))
 6 |     is_absolute_path_exists = os.path.exists(vision_tower)
 7 |     if "oryx_vit" in vision_tower:
 8 |         print(f"Buiding OryxViTWrapper from {vision_tower}...")
 9 |         path = vision_tower.split(":")[1]
10 |         return OryxViTWrapper(vision_tower, path=path, args=vision_tower_cfg, **kwargs)
11 |     else:
12 |         raise ValueError(f'Unknown vision tower: {vision_tower}')
13 | 


--------------------------------------------------------------------------------
/oryx/model/multimodal_projector/__pycache__/builder.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/oryx/model/multimodal_projector/__pycache__/builder.cpython-310.pyc


--------------------------------------------------------------------------------
/oryx/model/multimodal_resampler/__pycache__/builder.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/oryx/model/multimodal_resampler/__pycache__/builder.cpython-310.pyc


--------------------------------------------------------------------------------
/oryx/model/multimodal_resampler/__pycache__/masked_drop.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/oryx/model/multimodal_resampler/__pycache__/masked_drop.cpython-310.pyc


--------------------------------------------------------------------------------
/oryx/model/multimodal_resampler/__pycache__/perceiver.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/oryx/model/multimodal_resampler/__pycache__/perceiver.cpython-310.pyc


--------------------------------------------------------------------------------
/oryx/model/multimodal_resampler/__pycache__/qformer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/oryx/model/multimodal_resampler/__pycache__/qformer.cpython-310.pyc


--------------------------------------------------------------------------------
/oryx/model/multimodal_resampler/__pycache__/spatial_pool.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/oryx/model/multimodal_resampler/__pycache__/spatial_pool.cpython-310.pyc


--------------------------------------------------------------------------------
/oryx/model/multimodal_resampler/__pycache__/vlm_attention.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Oryx-mllm/Oryx/8228719263cf99edcbc342caa99b1165be6e2f26/oryx/model/multimodal_resampler/__pycache__/vlm_attention.cpython-310.pyc


--------------------------------------------------------------------------------
/oryx/train/train_mem.py:
--------------------------------------------------------------------------------
1 | 
2 | from oryx.train.train import train
3 | 
4 | if __name__ == "__main__":
5 |     train()
6 | 


--------------------------------------------------------------------------------
/scripts/eval_image.sh:
--------------------------------------------------------------------------------
 1 | export HF_HOME=""
 2 | export GPT_EVAL_VERSION=""
 3 | export OPENAI_API_KEY=""
 4 | export OPENAI_API_URL=""
 5 | 
 6 | export LOWRES_RESIZE=384x32
 7 | export VIDEO_RESIZE="0x64"
 8 | export HIGHRES_BASE="0x32"
 9 | export MAXRES=1536
10 | export MINRES=0
11 | export VIDEO_MAXRES=480
12 | export VIDEO_MINRES=288
13 | 
14 | MODEL_NAME=""
15 | MODEL_PATH=""
16 | 
17 | accelerate launch --num_processes=1 -m lmms_eval --model oryx_image  --model_args pretrained="$MODEL_PATH",mm_resampler_type="dynamic_compressor" --tasks mmbench_en_dev --batch_size 1 --log_samples --log_samples_suffix eval --output_path ./logs_eval/$MODEL_NAME --verbosity DEBUG


--------------------------------------------------------------------------------
/scripts/eval_video.sh:
--------------------------------------------------------------------------------
 1 | export HF_HOME=""
 2 | 
 3 | export GPT_EVAL_VERSION=""
 4 | export OPENAI_API_KEY=""
 5 | export OPENAI_API_URL=""
 6 | 
 7 | export LOWRES_RESIZE=384x32
 8 | export VIDEO_RESIZE="0x64"
 9 | export HIGHRES_BASE="0x32"
10 | export MAXRES=1536
11 | export MINRES=0
12 | export VIDEO_MAXRES=480
13 | export VIDEO_MINRES=288
14 | 
15 | MODEL_NAME=""
16 | MODEL_PATH=""
17 | 
18 | accelerate launch --num_processes=1 -m lmms_eval --model oryx --model_args pretrained=$MODEL_PATH,max_frames_num=64,mm_resampler_type="dynamic_compressor"  --tasks videochatgpt_temporal  --batch_size 1 --log_samples --log_samples_suffix eval --output_path ./logs_eval/$MODEL_NAME --verbosity DEBUG


--------------------------------------------------------------------------------
/scripts/zero2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": "auto",
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 16,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 |     "bf16": {
11 |         "enabled": "auto"
12 |     },
13 |     "train_micro_batch_size_per_gpu": "auto",
14 |     "train_batch_size": "auto",
15 |     "gradient_accumulation_steps": "auto",
16 |     "zero_optimization": {
17 |         "stage": 2,
18 |         "overlap_comm": true,
19 |         "contiguous_gradients": true,
20 |         "sub_group_size": 1e9,
21 |         "reduce_bucket_size": "auto"
22 |     }
23 | }


--------------------------------------------------------------------------------