├── README.md ├── VLMEvalKit ├── README.md ├── requirements.txt ├── requirements │ └── docs.txt ├── run.py ├── setup.py └── vlmeval │ ├── __init__.py │ ├── api │ ├── __init__.py │ ├── bailingmm.py │ ├── base.py │ ├── bluelm_v_api.py │ ├── claude.py │ ├── cloudwalk.py │ ├── gemini.py │ ├── glm_vision.py │ ├── gpt.py │ ├── hf_chat_model.py │ ├── hunyuan.py │ ├── jt_vl_chat.py │ ├── lmdeploy.py │ ├── qwen_api.py │ ├── qwen_vl_api.py │ ├── reka.py │ ├── sensechat_vision.py │ ├── siliconflow.py │ ├── stepai.py │ ├── taichu.py │ └── taiyi.py │ ├── config.py │ ├── dataset │ ├── __init__.py │ ├── cmmmu.py │ ├── dude.py │ ├── dynamath.py │ ├── image_base.py │ ├── image_caption.py │ ├── image_mcq.py │ ├── image_mt.py │ ├── image_vqa.py │ ├── image_yorn.py │ ├── longvideobench.py │ ├── miabench.py │ ├── mlvu.py │ ├── mmbench_video.py │ ├── mmgenbench.py │ ├── mmlongbench.py │ ├── mmmath.py │ ├── mvbench.py │ ├── slidevqa.py │ ├── tempcompass.py │ ├── text_base.py │ ├── text_mcq.py │ ├── utils │ │ ├── __init__.py │ │ ├── crpe.py │ │ ├── hrbench.py │ │ ├── judge_util.py │ │ ├── llavabench.py │ │ ├── longvideobench.py │ │ ├── mathv.py │ │ ├── mathverse.py │ │ ├── mathvista.py │ │ ├── mlvu.py │ │ ├── mmbench_video.py │ │ ├── mmdu.py │ │ ├── mmniah.py │ │ ├── mmvet.py │ │ ├── multiple_choice.py │ │ ├── mvbench.py │ │ ├── naturalbench.py │ │ ├── ocrbench.py │ │ ├── olympiadbench.py │ │ ├── qspatial.py │ │ ├── tablevqabench.py │ │ ├── tempcompass.py │ │ ├── videomme.py │ │ ├── vqa_eval.py │ │ └── yorn.py │ ├── vcr.py │ ├── video_base.py │ ├── video_concat_dataset.py │ ├── video_dataset_config.py │ ├── videomme.py │ └── wildvision.py │ ├── inference.py │ ├── inference_mt.py │ ├── inference_video.py │ ├── smp │ ├── __init__.py │ ├── file.py │ ├── log.py │ ├── misc.py │ └── vlm.py │ ├── tools.py │ ├── utils │ ├── __init__.py │ ├── matching_util.py │ ├── mp_util.py │ └── result_transfer.py │ └── vlm │ ├── __init__.py │ ├── base.py │ ├── internvl │ ├── __init__.py │ ├── internvl_chat.py │ └── utils.py │ ├── omnicaptioner │ ├── __init__.py │ ├── build_sys_prompt.py │ ├── model.py │ ├── prompt.py │ └── visual_utils.py │ ├── omnicaptioner_cot │ ├── __init__.py │ ├── build_sys_prompt.py │ ├── model.py │ └── prompt.py │ ├── qwen2_vl │ ├── __init__.py │ ├── model.py │ └── prompt.py │ └── qwencaptioner │ ├── __init__.py │ ├── build_sys_prompt.py │ ├── model.py │ └── prompt.py ├── assets ├── demo.jpg └── quantitative.jpg ├── data └── omni_caption_pretrain.yaml ├── dataset ├── __init__.py ├── cmmmu.py ├── dude.py ├── dynamath.py ├── image_base.py ├── image_caption.py ├── image_mcq.py ├── image_mt.py ├── image_vqa.py ├── image_yorn.py ├── longvideobench.py ├── miabench.py ├── mlvu.py ├── mmbench_video.py ├── mmgenbench.py ├── mmlongbench.py ├── mmmath.py ├── mvbench.py ├── slidevqa.py ├── tempcompass.py ├── text_base.py ├── text_mcq.py ├── utils │ ├── __init__.py │ ├── crpe.py │ ├── hrbench.py │ ├── judge_util.py │ ├── llavabench.py │ ├── longvideobench.py │ ├── mathv.py │ ├── mathverse.py │ ├── mathvista.py │ ├── mlvu.py │ ├── mmbench_video.py │ ├── mmdu.py │ ├── mmniah.py │ ├── mmvet.py │ ├── multiple_choice.py │ ├── mvbench.py │ ├── naturalbench.py │ ├── ocrbench.py │ ├── olympiadbench.py │ ├── qspatial.py │ ├── tablevqabench.py │ ├── tempcompass.py │ ├── videomme.py │ ├── vqa_eval.py │ └── yorn.py ├── vcr.py ├── video_base.py ├── video_concat_dataset.py ├── video_dataset_config.py ├── videomme.py └── wildvision.py ├── hostfile ├── pre_token_compute_add_tasktype.py ├── requirements.txt ├── scripts ├── finetune_caption_slurm.sh ├── merge_lora.sh ├── script.sh ├── zero2.json ├── zero3.json └── zero3_offload.json └── src ├── __init__.py ├── inference_single_image.py ├── inference_single_video.py ├── merge_lora_weights.py ├── training ├── __init__.py ├── dataset │ ├── __init__.py │ ├── build_sys_prompt.py │ ├── constants.py │ ├── data_multi_v_addsys_addcache.py │ ├── data_reader.py │ ├── dataset_from_xllmx.py │ ├── image_processing_qwen2_vl.py │ ├── processing_qwen2_vl.py │ └── visual_utils.py ├── dist_util.py ├── model │ ├── __init__.py │ └── qwen2_vl │ │ ├── __init__.py │ │ ├── configuration_qwen2_vl.py │ │ ├── image_processing_qwen2_vl.py │ │ ├── modeling_qwen2_vl_new.py │ │ └── processing_qwen2_vl.py ├── params.py ├── patch │ ├── __init__.py │ ├── llama2_flash_attn_monkey_patch.py │ ├── llama_flash_attn_monkey_patch.py │ ├── llama_rmsnorm_monkey_patch.py │ ├── pad_data_collator.py │ ├── train_sampler_patch.py │ └── train_sampler_patch_re.py ├── train.py ├── train_caption.py ├── train_utils.py └── trainer.py ├── utils.py └── visual_utils_qwen25vl.py /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/README.md -------------------------------------------------------------------------------- /VLMEvalKit/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/README.md -------------------------------------------------------------------------------- /VLMEvalKit/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/requirements.txt -------------------------------------------------------------------------------- /VLMEvalKit/requirements/docs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/requirements/docs.txt -------------------------------------------------------------------------------- /VLMEvalKit/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/run.py -------------------------------------------------------------------------------- /VLMEvalKit/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/setup.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/__init__.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/__init__.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/bailingmm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/bailingmm.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/base.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/bluelm_v_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/bluelm_v_api.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/claude.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/claude.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/cloudwalk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/cloudwalk.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/gemini.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/gemini.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/glm_vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/glm_vision.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/gpt.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/hf_chat_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/hf_chat_model.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/hunyuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/hunyuan.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/jt_vl_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/jt_vl_chat.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/lmdeploy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/lmdeploy.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/qwen_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/qwen_api.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/qwen_vl_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/qwen_vl_api.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/reka.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/reka.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/sensechat_vision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/sensechat_vision.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/siliconflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/siliconflow.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/stepai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/stepai.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/taichu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/taichu.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/api/taiyi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/api/taiyi.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/config.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/__init__.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/cmmmu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/cmmmu.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/dude.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/dude.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/dynamath.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/dynamath.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/image_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/image_base.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/image_caption.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/image_caption.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/image_mcq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/image_mcq.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/image_mt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/image_mt.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/image_vqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/image_vqa.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/image_yorn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/image_yorn.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/longvideobench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/longvideobench.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/miabench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/miabench.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/mlvu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/mlvu.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/mmbench_video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/mmbench_video.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/mmgenbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/mmgenbench.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/mmlongbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/mmlongbench.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/mmmath.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/mmmath.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/mvbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/mvbench.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/slidevqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/slidevqa.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/tempcompass.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/tempcompass.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/text_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/text_base.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/text_mcq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/text_mcq.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/__init__.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/crpe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/crpe.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/hrbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/hrbench.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/judge_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/judge_util.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/llavabench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/llavabench.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/longvideobench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/longvideobench.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/mathv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/mathv.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/mathverse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/mathverse.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/mathvista.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/mathvista.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/mlvu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/mlvu.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/mmbench_video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/mmbench_video.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/mmdu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/mmdu.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/mmniah.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/mmniah.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/mmvet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/mmvet.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/multiple_choice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/multiple_choice.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/mvbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/mvbench.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/naturalbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/naturalbench.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/ocrbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/ocrbench.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/olympiadbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/olympiadbench.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/qspatial.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/qspatial.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/tablevqabench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/tablevqabench.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/tempcompass.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/tempcompass.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/videomme.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/videomme.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/vqa_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/vqa_eval.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/utils/yorn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/utils/yorn.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/vcr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/vcr.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/video_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/video_base.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/video_concat_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/video_concat_dataset.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/video_dataset_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/video_dataset_config.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/videomme.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/videomme.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/dataset/wildvision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/dataset/wildvision.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/inference.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/inference_mt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/inference_mt.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/inference_video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/inference_video.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/smp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/smp/__init__.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/smp/file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/smp/file.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/smp/log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/smp/log.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/smp/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/smp/misc.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/smp/vlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/smp/vlm.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/tools.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/utils/__init__.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/utils/matching_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/utils/matching_util.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/utils/mp_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/utils/mp_util.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/utils/result_transfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/utils/result_transfer.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/__init__.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/base.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/internvl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/internvl/__init__.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/internvl/internvl_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/internvl/internvl_chat.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/internvl/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/internvl/utils.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/omnicaptioner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/omnicaptioner/__init__.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/omnicaptioner/build_sys_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/omnicaptioner/build_sys_prompt.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/omnicaptioner/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/omnicaptioner/model.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/omnicaptioner/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/omnicaptioner/prompt.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/omnicaptioner/visual_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/omnicaptioner/visual_utils.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/omnicaptioner_cot/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/omnicaptioner_cot/__init__.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/omnicaptioner_cot/build_sys_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/omnicaptioner_cot/build_sys_prompt.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/omnicaptioner_cot/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/omnicaptioner_cot/model.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/omnicaptioner_cot/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/omnicaptioner_cot/prompt.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/qwen2_vl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/qwen2_vl/__init__.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/qwen2_vl/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/qwen2_vl/model.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/qwen2_vl/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/qwen2_vl/prompt.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/qwencaptioner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/qwencaptioner/__init__.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/qwencaptioner/build_sys_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/qwencaptioner/build_sys_prompt.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/qwencaptioner/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/qwencaptioner/model.py -------------------------------------------------------------------------------- /VLMEvalKit/vlmeval/vlm/qwencaptioner/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/VLMEvalKit/vlmeval/vlm/qwencaptioner/prompt.py -------------------------------------------------------------------------------- /assets/demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/assets/demo.jpg -------------------------------------------------------------------------------- /assets/quantitative.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/assets/quantitative.jpg -------------------------------------------------------------------------------- /data/omni_caption_pretrain.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/data/omni_caption_pretrain.yaml -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/__init__.py -------------------------------------------------------------------------------- /dataset/cmmmu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/cmmmu.py -------------------------------------------------------------------------------- /dataset/dude.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/dude.py -------------------------------------------------------------------------------- /dataset/dynamath.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/dynamath.py -------------------------------------------------------------------------------- /dataset/image_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/image_base.py -------------------------------------------------------------------------------- /dataset/image_caption.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/image_caption.py -------------------------------------------------------------------------------- /dataset/image_mcq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/image_mcq.py -------------------------------------------------------------------------------- /dataset/image_mt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/image_mt.py -------------------------------------------------------------------------------- /dataset/image_vqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/image_vqa.py -------------------------------------------------------------------------------- /dataset/image_yorn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/image_yorn.py -------------------------------------------------------------------------------- /dataset/longvideobench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/longvideobench.py -------------------------------------------------------------------------------- /dataset/miabench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/miabench.py -------------------------------------------------------------------------------- /dataset/mlvu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/mlvu.py -------------------------------------------------------------------------------- /dataset/mmbench_video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/mmbench_video.py -------------------------------------------------------------------------------- /dataset/mmgenbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/mmgenbench.py -------------------------------------------------------------------------------- /dataset/mmlongbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/mmlongbench.py -------------------------------------------------------------------------------- /dataset/mmmath.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/mmmath.py -------------------------------------------------------------------------------- /dataset/mvbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/mvbench.py -------------------------------------------------------------------------------- /dataset/slidevqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/slidevqa.py -------------------------------------------------------------------------------- /dataset/tempcompass.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/tempcompass.py -------------------------------------------------------------------------------- /dataset/text_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/text_base.py -------------------------------------------------------------------------------- /dataset/text_mcq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/text_mcq.py -------------------------------------------------------------------------------- /dataset/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/__init__.py -------------------------------------------------------------------------------- /dataset/utils/crpe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/crpe.py -------------------------------------------------------------------------------- /dataset/utils/hrbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/hrbench.py -------------------------------------------------------------------------------- /dataset/utils/judge_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/judge_util.py -------------------------------------------------------------------------------- /dataset/utils/llavabench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/llavabench.py -------------------------------------------------------------------------------- /dataset/utils/longvideobench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/longvideobench.py -------------------------------------------------------------------------------- /dataset/utils/mathv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/mathv.py -------------------------------------------------------------------------------- /dataset/utils/mathverse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/mathverse.py -------------------------------------------------------------------------------- /dataset/utils/mathvista.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/mathvista.py -------------------------------------------------------------------------------- /dataset/utils/mlvu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/mlvu.py -------------------------------------------------------------------------------- /dataset/utils/mmbench_video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/mmbench_video.py -------------------------------------------------------------------------------- /dataset/utils/mmdu.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/mmdu.py -------------------------------------------------------------------------------- /dataset/utils/mmniah.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/mmniah.py -------------------------------------------------------------------------------- /dataset/utils/mmvet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/mmvet.py -------------------------------------------------------------------------------- /dataset/utils/multiple_choice.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/multiple_choice.py -------------------------------------------------------------------------------- /dataset/utils/mvbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/mvbench.py -------------------------------------------------------------------------------- /dataset/utils/naturalbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/naturalbench.py -------------------------------------------------------------------------------- /dataset/utils/ocrbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/ocrbench.py -------------------------------------------------------------------------------- /dataset/utils/olympiadbench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/olympiadbench.py -------------------------------------------------------------------------------- /dataset/utils/qspatial.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/qspatial.py -------------------------------------------------------------------------------- /dataset/utils/tablevqabench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/tablevqabench.py -------------------------------------------------------------------------------- /dataset/utils/tempcompass.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/tempcompass.py -------------------------------------------------------------------------------- /dataset/utils/videomme.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/videomme.py -------------------------------------------------------------------------------- /dataset/utils/vqa_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/vqa_eval.py -------------------------------------------------------------------------------- /dataset/utils/yorn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/utils/yorn.py -------------------------------------------------------------------------------- /dataset/vcr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/vcr.py -------------------------------------------------------------------------------- /dataset/video_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/video_base.py -------------------------------------------------------------------------------- /dataset/video_concat_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/video_concat_dataset.py -------------------------------------------------------------------------------- /dataset/video_dataset_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/video_dataset_config.py -------------------------------------------------------------------------------- /dataset/videomme.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/videomme.py -------------------------------------------------------------------------------- /dataset/wildvision.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/dataset/wildvision.py -------------------------------------------------------------------------------- /hostfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/hostfile -------------------------------------------------------------------------------- /pre_token_compute_add_tasktype.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/pre_token_compute_add_tasktype.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/finetune_caption_slurm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/scripts/finetune_caption_slurm.sh -------------------------------------------------------------------------------- /scripts/merge_lora.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/scripts/merge_lora.sh -------------------------------------------------------------------------------- /scripts/script.sh: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /scripts/zero2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/scripts/zero2.json -------------------------------------------------------------------------------- /scripts/zero3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/scripts/zero3.json -------------------------------------------------------------------------------- /scripts/zero3_offload.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/scripts/zero3_offload.json -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/inference_single_image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/inference_single_image.py -------------------------------------------------------------------------------- /src/inference_single_video.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/inference_single_video.py -------------------------------------------------------------------------------- /src/merge_lora_weights.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/merge_lora_weights.py -------------------------------------------------------------------------------- /src/training/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/training/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/training/dataset/build_sys_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/dataset/build_sys_prompt.py -------------------------------------------------------------------------------- /src/training/dataset/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/dataset/constants.py -------------------------------------------------------------------------------- /src/training/dataset/data_multi_v_addsys_addcache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/dataset/data_multi_v_addsys_addcache.py -------------------------------------------------------------------------------- /src/training/dataset/data_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/dataset/data_reader.py -------------------------------------------------------------------------------- /src/training/dataset/dataset_from_xllmx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/dataset/dataset_from_xllmx.py -------------------------------------------------------------------------------- /src/training/dataset/image_processing_qwen2_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/dataset/image_processing_qwen2_vl.py -------------------------------------------------------------------------------- /src/training/dataset/processing_qwen2_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/dataset/processing_qwen2_vl.py -------------------------------------------------------------------------------- /src/training/dataset/visual_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/dataset/visual_utils.py -------------------------------------------------------------------------------- /src/training/dist_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/dist_util.py -------------------------------------------------------------------------------- /src/training/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/training/model/qwen2_vl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/model/qwen2_vl/__init__.py -------------------------------------------------------------------------------- /src/training/model/qwen2_vl/configuration_qwen2_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/model/qwen2_vl/configuration_qwen2_vl.py -------------------------------------------------------------------------------- /src/training/model/qwen2_vl/image_processing_qwen2_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/model/qwen2_vl/image_processing_qwen2_vl.py -------------------------------------------------------------------------------- /src/training/model/qwen2_vl/modeling_qwen2_vl_new.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/model/qwen2_vl/modeling_qwen2_vl_new.py -------------------------------------------------------------------------------- /src/training/model/qwen2_vl/processing_qwen2_vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/model/qwen2_vl/processing_qwen2_vl.py -------------------------------------------------------------------------------- /src/training/params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/params.py -------------------------------------------------------------------------------- /src/training/patch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/patch/__init__.py -------------------------------------------------------------------------------- /src/training/patch/llama2_flash_attn_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/patch/llama2_flash_attn_monkey_patch.py -------------------------------------------------------------------------------- /src/training/patch/llama_flash_attn_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/patch/llama_flash_attn_monkey_patch.py -------------------------------------------------------------------------------- /src/training/patch/llama_rmsnorm_monkey_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/patch/llama_rmsnorm_monkey_patch.py -------------------------------------------------------------------------------- /src/training/patch/pad_data_collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/patch/pad_data_collator.py -------------------------------------------------------------------------------- /src/training/patch/train_sampler_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/patch/train_sampler_patch.py -------------------------------------------------------------------------------- /src/training/patch/train_sampler_patch_re.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/patch/train_sampler_patch_re.py -------------------------------------------------------------------------------- /src/training/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/train.py -------------------------------------------------------------------------------- /src/training/train_caption.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/train_caption.py -------------------------------------------------------------------------------- /src/training/train_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/train_utils.py -------------------------------------------------------------------------------- /src/training/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/training/trainer.py -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/utils.py -------------------------------------------------------------------------------- /src/visual_utils_qwen25vl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternScience/OmniCaptioner/HEAD/src/visual_utils_qwen25vl.py --------------------------------------------------------------------------------