├── Eagle ├── lmms_eval │ ├── __init__.py │ ├── api │ │ ├── __init__.py │ │ ├── instance.py │ │ └── filter.py │ ├── tasks │ │ ├── _task_utils │ │ │ ├── gpt_eval_utils.py │ │ │ └── file_utils.py │ │ ├── ok_vqa │ │ │ ├── _ok_vqa.yaml │ │ │ ├── ok_vqa_val2014.yaml │ │ │ ├── _default_template_vqa_yaml │ │ │ ├── _generate_config.py │ │ │ └── utils.py │ │ ├── flickr30k │ │ │ ├── flickr30k.yaml │ │ │ └── flickr30k_test.yaml │ │ ├── mmmu │ │ │ ├── mmmu.yaml │ │ │ ├── mmmu_val.yaml │ │ │ └── mmmu_test.yaml │ │ ├── docvqa │ │ │ ├── docvqa.yaml │ │ │ ├── docvqa_val.yaml │ │ │ ├── docvqa_test.yaml │ │ │ ├── _default_template_docvqa_yaml │ │ │ └── utils.py │ │ ├── vqav2 │ │ │ ├── _vqav2.yaml │ │ │ ├── vqav2_test.yaml │ │ │ ├── vqav2_val.yaml │ │ │ └── _default_template_vqav2_yaml │ │ ├── cmmmu │ │ │ ├── _cmmmu.yaml │ │ │ ├── _default_template_cmmmu_yaml │ │ │ ├── cmmmu_test.yaml │ │ │ └── cmmmu_val.yaml │ │ ├── iconqa │ │ │ ├── iconqa.yaml │ │ │ ├── iconqa_test.yaml │ │ │ ├── iconqa_val.yaml │ │ │ ├── _default_template_docvqa_yaml │ │ │ └── utils.py │ │ ├── nocaps │ │ │ ├── nocaps.yaml │ │ │ ├── _default_template_nocaps_yaml │ │ │ ├── nocaps_test.yaml │ │ │ └── nocaps_val.yaml │ │ ├── infovqa │ │ │ ├── infovqa.yaml │ │ │ ├── infovqa_val.yaml │ │ │ ├── infovqa_test.yaml │ │ │ ├── _default_template_infovqa_yaml │ │ │ └── utils.py │ │ ├── textvqa │ │ │ ├── _textvqa.yaml │ │ │ ├── textvqa_test.yaml │ │ │ ├── textvqa_val.yaml │ │ │ ├── _default_template_textvqa_yaml │ │ │ └── utils.py │ │ ├── textcaps │ │ │ ├── textcaps.yaml │ │ │ ├── _default_template_textcaps_yaml │ │ │ ├── textcaps_test.yaml │ │ │ ├── textcaps_val.yaml │ │ │ └── textcaps_train.yaml │ │ ├── scienceqa │ │ │ ├── scienceqa_full.yaml │ │ │ ├── scienceqa.yaml │ │ │ ├── scienceqa_img.yaml │ │ │ └── utils.py │ │ ├── vizwiz_vqa │ │ │ ├── _vizwiz_vqa.yaml │ │ │ ├── vizwiz_vqa_val.yaml │ │ │ ├── vizwiz_vqa_test.yaml │ │ │ ├── _default_template_vqa_yaml │ │ │ ├── _generate_config.py │ │ │ └── utils.py │ │ ├── multidocvqa │ │ │ ├── multidocvqa.yaml │ │ │ ├── multidocvqa_test.yaml │ │ │ └── multidocvqa_val.yaml │ │ ├── coco_cap │ │ │ ├── coco2014_cap.yaml │ │ │ ├── coco2017_cap.yaml │ │ │ ├── coco_cap.yaml │ │ │ ├── coco2017_cap_test.yaml │ │ │ ├── coco2014_cap_test.yaml │ │ │ ├── coco2017_cap_val.yaml │ │ │ └── coco2014_cap_val.yaml │ │ ├── refcoco+ │ │ │ ├── refcoco+_seg_val.yaml │ │ │ ├── refcoco+_bbox_val.yaml │ │ │ ├── refcoco+_bbox_testA.yaml │ │ │ ├── refcoco+_bbox_testB.yaml │ │ │ ├── refcoco+_seg_testA.yaml │ │ │ ├── refcoco+_seg_testB.yaml │ │ │ ├── _refcoco.yaml │ │ │ ├── _generate_config.py │ │ │ ├── _default_template_seg_yaml │ │ │ └── _default_template_bbox_yaml │ │ ├── refcoco │ │ │ ├── refcoco_bbox_val.yaml │ │ │ ├── refcoco_seg_test.yaml │ │ │ ├── refcoco_seg_val.yaml │ │ │ ├── refcoco_bbox_test.yaml │ │ │ ├── refcoco_seg_testA.yaml │ │ │ ├── refcoco_seg_testB.yaml │ │ │ ├── refcoco_bbox_testA.yaml │ │ │ ├── refcoco_bbox_testB.yaml │ │ │ ├── _refcoco.yaml │ │ │ ├── _generate_config.py │ │ │ ├── _default_template_seg_yaml │ │ │ └── _default_template_bbox_yaml │ │ ├── refcocog │ │ │ ├── refcocog_seg_val.yaml │ │ │ ├── refcocog_bbox_val.yaml │ │ │ ├── refcocog_seg_test.yaml │ │ │ ├── _refcoco.yaml │ │ │ ├── refcocog_bbox_test.yaml │ │ │ ├── _generate_config.py │ │ │ ├── _default_template_bbox_yaml │ │ │ └── _default_template_seg_yaml │ │ ├── olympiadbench │ │ │ ├── olympiadbench.yaml │ │ │ ├── olympiadbench_test_cn.yaml │ │ │ ├── olympiadbench_test_en.yaml │ │ │ └── cn_utils.py │ │ ├── mmbench │ │ │ ├── mmbench_cn.yaml │ │ │ ├── mmbench_en.yaml │ │ │ ├── mmbench_cn_test.yaml │ │ │ ├── mmbench_en_test.yaml │ │ │ ├── mmbench.yaml │ │ │ ├── mmbench_cn_dev.yaml │ │ │ ├── mmbench_en_dev.yaml │ │ │ ├── _default_template_mmbench_cn_yaml │ │ │ ├── _default_template_mmbench_en_yaml │ │ │ └── mmbench_cc.yaml │ │ ├── mathvista │ │ │ ├── mathvista.yaml │ │ │ ├── mathvista_test.yaml │ │ │ └── mathvista_testmini.yaml │ │ ├── seedbench │ │ │ ├── seedbench_ppl.yaml │ │ │ ├── seedbench.yaml │ │ │ └── utils.py │ │ ├── stvqa │ │ │ ├── stvqa.yaml │ │ │ └── utils.py │ │ ├── ocrbench │ │ │ └── ocrbench.yaml │ │ ├── gqa │ │ │ ├── gqa.yaml │ │ │ └── utils.py │ │ ├── chartqa │ │ │ ├── chartqa.yaml │ │ │ └── utils.py │ │ ├── ai2d │ │ │ ├── ai2d.yaml │ │ │ └── utils.py │ │ ├── mmvet │ │ │ └── mmvet.yaml │ │ ├── pope │ │ │ └── pope.yaml │ │ ├── mme │ │ │ └── mme.yaml │ │ ├── llava-bench-coco │ │ │ └── llava-bench-coco.yaml │ │ ├── ferret │ │ │ └── ferret.yaml │ │ ├── llava-in-the-wild │ │ │ └── llava-in-the-wild.yaml │ │ ├── hallusion_bench │ │ │ └── hallusion_bench_image.yaml │ │ └── seedbench_2 │ │ │ ├── seedbench_2.yaml │ │ │ └── utils.py │ ├── models │ │ └── __init__.py │ └── filters │ │ ├── decontamination.py │ │ ├── __init__.py │ │ ├── transformation.py │ │ ├── selection.py │ │ └── extraction.py ├── eagle │ ├── model │ │ ├── language_model │ │ │ └── __init__.py │ │ ├── multimodal_encoder │ │ │ ├── __init__.py │ │ │ ├── vision_models │ │ │ │ └── __init__.py │ │ │ └── builder.py │ │ ├── multimodal_projector │ │ │ ├── __init__.py │ │ │ └── builder.py │ │ ├── __init__.py │ │ └── consolidate.py │ ├── __init__.py │ └── constants.py ├── Eagle.pdf ├── assets │ ├── Eagle.png │ ├── Logo.png │ ├── nvidia.jpeg │ ├── eagle-logo.png │ ├── fig-teaser.jpg │ ├── georgia-tech.jpeg │ ├── visual │ │ ├── AV1.png │ │ ├── AV2.png │ │ ├── Doc1.png │ │ ├── Doc2.png │ │ ├── Doc3.png │ │ ├── VQA1.png │ │ ├── VQA2.png │ │ └── VQA3.png │ ├── animal-compare.png │ ├── health-insurance.png │ └── leasing-apartment.png ├── train_mem.py ├── scripts │ ├── convert_mmvet_for_eval.py │ ├── eval_lmms_eval │ │ ├── eval-vizwiz-vqav2.sh │ │ ├── eval-mmbench-mathvista.sh │ │ └── eval-mme-seed-pope-sqa-gqa-ocrbench-textvqa-chartqa.sh │ ├── convert_gqa_for_eval.py │ ├── eval │ │ ├── mmmu.sh │ │ ├── pope.sh │ │ ├── textvqa.sh │ │ ├── vizwiz.sh │ │ ├── sqa.sh │ │ ├── mme.sh │ │ ├── vqav2.sh │ │ └── gqa.sh │ ├── zero2.json │ ├── zero3.json │ ├── convert_mmbench_for_submission.py │ ├── zero3_offload.json │ ├── convert_vizwiz_for_submission.py │ ├── pretrain-eagle-x4-vicuna-13b.sh │ ├── pretrain-eagle-x4-vicuna-7b.sh │ ├── pretrain-eagle-x5-vicuna-7b.sh │ ├── pretrain_eagle_x5_vicuna_7b.sh │ ├── pretrain-eagle-x5-vicuna-13b.sh │ ├── pretrain-eagle-x5-llama3-8b.sh │ ├── finetune-eagle-x4-vicuna-13b-1.8m.sh │ ├── finetune-eagle-x4-vicuna-7b-1.8m.sh │ ├── finetune-eagle-x5-vicuna-7b-1.8m.sh │ ├── finetune-eagle-x5-vicuna-13b-1.8m.sh │ ├── pretrain-eagle-x5-yi34b-cambrian.sh │ ├── finetune-eagle-x5-llama3-8b-1.8m.sh │ ├── finetune-eagle-x5-yi-34b-cambrian-7m.sh │ └── convert_vqav2_for_submission.py ├── setup.py └── requirements.txt ├── Eagle2_5 ├── eaglevl │ ├── model │ │ ├── __init__.py │ │ └── eagle2_5 │ │ │ └── __init__.py │ ├── train │ │ ├── __init__.py │ │ └── constants.py │ ├── sp_utils │ │ ├── __init__.py │ │ ├── ring │ │ │ └── __init__.py │ │ └── attention.py │ └── patch │ │ ├── __init__.py │ │ └── fused_monkey_patch.py ├── Eagle2.pdf ├── Eagle2.5.pdf ├── streamlit_demo │ ├── gallery │ │ └── prod_1.jpeg │ ├── static │ │ └── SimHei.ttf │ ├── .streamlit │ │ └── config.toml │ ├── start_demo.sh │ ├── lasting_demo.sh │ ├── constants.py │ ├── forward_port.sh │ └── sd_worker.py ├── document │ ├── 4.streamlit_demo.md │ ├── 0.onboarding.md │ ├── how_to_use_lmdb_to_read_images.md │ ├── 1.installing.md │ └── 3.training.md ├── shell │ └── submit_prepare_job.sh ├── deepspeed_configs │ ├── zero_stage1_config_nooptim.json │ ├── zero_stage2_config.json │ ├── zero_stage1_config.json │ └── zero_stage3_config.json ├── deployment │ └── setup_x86.dockerfile ├── pyproject.toml └── README.md ├── .gitattributes └── .gitignore /Eagle/lmms_eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Eagle2_5/eaglevl/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Eagle2_5/eaglevl/train/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Eagle/eagle/model/language_model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Eagle/eagle/model/multimodal_encoder/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Eagle/eagle/model/multimodal_projector/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/_task_utils/gpt_eval_utils.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Eagle/eagle/model/multimodal_encoder/vision_models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Eagle/eagle/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import EagleLlamaForCausalLM 2 | -------------------------------------------------------------------------------- /Eagle/Eagle.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/Eagle/HEAD/Eagle/Eagle.pdf -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/ok_vqa/_ok_vqa.yaml: -------------------------------------------------------------------------------- 1 | group: ok_vqa 2 | task: 3 | - ok_vqa_val2014 -------------------------------------------------------------------------------- /Eagle2_5/Eagle2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/Eagle/HEAD/Eagle2_5/Eagle2.pdf -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/flickr30k/flickr30k.yaml: -------------------------------------------------------------------------------- 1 | group: flickr30k 2 | task: 3 | - flickr30k_test -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mmmu/mmmu.yaml: -------------------------------------------------------------------------------- 1 | group: mmmu 2 | task: 3 | - mmmu_val 4 | - mmmu_test 5 | -------------------------------------------------------------------------------- /Eagle2_5/Eagle2.5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/Eagle/HEAD/Eagle2_5/Eagle2.5.pdf -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/docvqa/docvqa.yaml: -------------------------------------------------------------------------------- 1 | group: docvqa 2 | task: 3 | - docvqa_val 4 | - docvqa_test -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/vqav2/_vqav2.yaml: -------------------------------------------------------------------------------- 1 | group: vqav2 2 | task: 3 | - vqav2_val 4 | - vqav2_test -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/cmmmu/_cmmmu.yaml: -------------------------------------------------------------------------------- 1 | group: cmmmu 2 | task: 3 | - cmmmu_val 4 | - cmmmu_test 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/iconqa/iconqa.yaml: -------------------------------------------------------------------------------- 1 | group: iconqa 2 | task: 3 | - iconqa_val 4 | - iconqa_test 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/nocaps/nocaps.yaml: -------------------------------------------------------------------------------- 1 | group : nocaps 2 | task: 3 | - nocaps_test 4 | - nocaps_val -------------------------------------------------------------------------------- /Eagle/eagle/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .language_model.eagle_llama import EagleLlamaForCausalLM, EagleConfig 2 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/infovqa/infovqa.yaml: -------------------------------------------------------------------------------- 1 | group: infovqa 2 | task: 3 | - infovqa_val 4 | - infovqa_test 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/textvqa/_textvqa.yaml: -------------------------------------------------------------------------------- 1 | group: textvqa 2 | task: 3 | - textvqa_val 4 | - textvqa_test -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/textcaps/textcaps.yaml: -------------------------------------------------------------------------------- 1 | group : textcaps 2 | task: 3 | - textcaps_val 4 | - textcaps_test -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/scienceqa/scienceqa_full.yaml: -------------------------------------------------------------------------------- 1 | group: scienceqa_full 2 | task: 3 | - scienceqa 4 | - scienceqa_img -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/vizwiz_vqa/_vizwiz_vqa.yaml: -------------------------------------------------------------------------------- 1 | group: vizwiz_vqa 2 | task: 3 | - vizwiz_vqa_val 4 | - vizwiz_vqa_test -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/iconqa/iconqa_test.yaml: -------------------------------------------------------------------------------- 1 | task: "iconqa_test" 2 | test_split: test 3 | include: _default_template_docvqa_yaml -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/iconqa/iconqa_val.yaml: -------------------------------------------------------------------------------- 1 | task: "iconqa_val" 2 | test_split: val 3 | include: _default_template_docvqa_yaml -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/multidocvqa/multidocvqa.yaml: -------------------------------------------------------------------------------- 1 | group: multidocvqa 2 | task: 3 | - multidocvqa_val 4 | - multidocvqa_test 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/coco_cap/coco2014_cap.yaml: -------------------------------------------------------------------------------- 1 | group : coco2014_cap 2 | task: 3 | - coco2014_cap_val 4 | - coco2014_cap_test -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/coco_cap/coco2017_cap.yaml: -------------------------------------------------------------------------------- 1 | group : coco2017_cap 2 | task: 3 | - coco2017_cap_val 4 | - coco2017_cap_test -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/ok_vqa/ok_vqa_val2014.yaml: -------------------------------------------------------------------------------- 1 | group: ok_vqa 2 | task: ok_vqa_val2014 3 | test_split: val2014 4 | include: _default_template_vqa_yaml -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco+/refcoco+_seg_val.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco+_seg 2 | task: refcoco+_seg_val 3 | include: _default_template_seg_yaml 4 | test_split: val 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco/refcoco_bbox_val.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_bbox 2 | task: refcoco_bbox_val 3 | test_split: val 4 | include: _default_template_bbox_yaml 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco/refcoco_seg_test.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_seg 2 | task: refcoco_seg_test 3 | test_split: test 4 | include: _default_template_seg_yaml 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco/refcoco_seg_val.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_seg 2 | task: refcoco_seg_val 3 | test_split: val 4 | include: _default_template_seg_yaml 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcocog/refcocog_seg_val.yaml: -------------------------------------------------------------------------------- 1 | group: refcocog_seg 2 | task: refcocog_seg_val 3 | include: _default_template_seg_yaml 4 | test_split: val 5 | -------------------------------------------------------------------------------- /Eagle/assets/Eagle.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:486012132ba08a5661c3ceaa45916e5acfaf863d495635010abc6a31f8f25aa1 3 | size 1829682 4 | -------------------------------------------------------------------------------- /Eagle/assets/Logo.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:15c5c3fe6143e7b6f11a2ca9f58a63e3803a8fa1c521bc0c4eb86c935983ad45 3 | size 1415072 4 | -------------------------------------------------------------------------------- /Eagle/assets/nvidia.jpeg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:77e41558cd89d3854d685b7f61e13d11ccf6564bab86655eaa278823f86f0a0e 3 | size 790457 4 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco+/refcoco+_bbox_val.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco+_bbox 2 | task: refcoco+_bbox_val 3 | include: _default_template_bbox_yaml 4 | test_split: val 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco/refcoco_bbox_test.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_bbox 2 | task: refcoco_bbox_test 3 | test_split: test 4 | include: _default_template_bbox_yaml 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco/refcoco_seg_testA.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_seg 2 | task: refcoco_seg_testA 3 | test_split: testA 4 | include: _default_template_seg_yaml 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco/refcoco_seg_testB.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_seg 2 | task: refcoco_seg_testB 3 | test_split: testB 4 | include: _default_template_seg_yaml 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcocog/refcocog_bbox_val.yaml: -------------------------------------------------------------------------------- 1 | group: refcocog_bbox 2 | task: refcocog_bbox_val 3 | include: _default_template_bbox_yaml 4 | test_split: val 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcocog/refcocog_seg_test.yaml: -------------------------------------------------------------------------------- 1 | group: refcocog_seg 2 | task: refcocog_seg_test 3 | include: _default_template_seg_yaml 4 | test_split: test 5 | -------------------------------------------------------------------------------- /Eagle/assets/eagle-logo.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f21888d107fd09b1b09b8bcfaf36d5ed38230b21ca40ff32f7e4d349b2d30333 3 | size 534690 4 | -------------------------------------------------------------------------------- /Eagle/assets/fig-teaser.jpg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0b1cf45b98f41b4ec65d0da7b0b913ea9566f0da7d06966a5d1e6147f60759a9 3 | size 483062 4 | -------------------------------------------------------------------------------- /Eagle/assets/georgia-tech.jpeg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e35f5652aa4010d4c00547805deaab68a0ea7e1d8957503d4e64c19e2fcfdde4 3 | size 315669 4 | -------------------------------------------------------------------------------- /Eagle/assets/visual/AV1.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:3bd72cc5f0f3480dc18fcc5d9f7cf4bfa73ebac3281380fe2858c898944d9ba3 3 | size 898361 4 | -------------------------------------------------------------------------------- /Eagle/assets/visual/AV2.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:c21c5562ffe207fbd37a3efc7ba40eae4e4584a29a188d5095b0df649691f4ed 3 | size 961106 4 | -------------------------------------------------------------------------------- /Eagle/assets/visual/Doc1.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8738c6297fa86cf857e2199404df506c20a5ab70bce3c18f220e8a28c1565656 3 | size 156814 4 | -------------------------------------------------------------------------------- /Eagle/assets/visual/Doc2.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:dd8b8ba8bbf5d5f179b12c0f9dd3a73c967e2e33ecf070df6905667febd30e26 3 | size 128759 4 | -------------------------------------------------------------------------------- /Eagle/assets/visual/Doc3.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:58ec04202ccdb3483286aaa08fa230242e95a30baac4e7c45888b26bb6330dc4 3 | size 224869 4 | -------------------------------------------------------------------------------- /Eagle/assets/visual/VQA1.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:dc33fe4a96a9993b057d99fb096e958ef02dd8cd1ebd1827ce2d81a6fdf770bb 3 | size 820803 4 | -------------------------------------------------------------------------------- /Eagle/assets/visual/VQA2.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ca807eef10fa03e58a8758e7abe32cbc1440fc7f38f3dbbae5474a894c54d50c 3 | size 1250156 4 | -------------------------------------------------------------------------------- /Eagle/assets/visual/VQA3.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1203f3d32776a43ab38e6408aeaa3ebd8422205a6994b1c03174fa0a1a8d88ac 3 | size 228453 4 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco+/refcoco+_bbox_testA.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco+_bbox 2 | task: refcoco+_bbox_testA 3 | include: _default_template_bbox_yaml 4 | test_split: testA 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco+/refcoco+_bbox_testB.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco+_bbox 2 | task: refcoco+_bbox_testB 3 | include: _default_template_bbox_yaml 4 | test_split: testB 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco+/refcoco+_seg_testA.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco+_seg 2 | task: refcoco+_seg_testA 3 | include: _default_template_seg_yaml 4 | test_split: testA 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco+/refcoco+_seg_testB.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco+_seg 2 | task: refcoco+_seg_testB 3 | include: _default_template_seg_yaml 4 | test_split: testB 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco/refcoco_bbox_testA.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_bbox 2 | task: refcoco_bbox_testA 3 | test_split: testA 4 | include: _default_template_bbox_yaml 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco/refcoco_bbox_testB.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco_bbox 2 | task: refcoco_bbox_testB 3 | test_split: testB 4 | include: _default_template_bbox_yaml 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcocog/_refcoco.yaml: -------------------------------------------------------------------------------- 1 | group: refcocog 2 | task: 3 | - refcocog_seg_test 4 | - refcocog_seg_val 5 | - refcocog_bbox_test 6 | - refcocog_bbox_val 7 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcocog/refcocog_bbox_test.yaml: -------------------------------------------------------------------------------- 1 | group: refcocog_bbox 2 | task: refcocog_bbox_test 3 | include: _default_template_bbox_yaml 4 | test_split: test 5 | -------------------------------------------------------------------------------- /Eagle/assets/animal-compare.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:fa869d21fbdc6c252eac439a4be0c1716d2b7a0d79a1f5705979e85b7ea9328c 3 | size 926281 4 | -------------------------------------------------------------------------------- /Eagle/assets/health-insurance.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a78b5d02c6936911850eb8410c0c6eb413cf3bc108b58d762d011855766a97af 3 | size 81714 4 | -------------------------------------------------------------------------------- /Eagle/assets/leasing-apartment.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:28011d0ed0f7c291a5f44b9756a591abe019004678238db99295dbcdbfcbbbe2 3 | size 193012 4 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/coco_cap/coco_cap.yaml: -------------------------------------------------------------------------------- 1 | group : coco_cap 2 | task: 3 | - coco2014_cap_val 4 | - coco2014_cap_test 5 | - coco2017_cap_val 6 | - coco2017_cap_test 7 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/nocaps/_default_template_nocaps_yaml: -------------------------------------------------------------------------------- 1 | model_specific_prompt_kwargs: 2 | default: 3 | prompt: "Provide a one-sentence caption for the provided image." -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/textcaps/_default_template_textcaps_yaml: -------------------------------------------------------------------------------- 1 | model_specific_prompt_kwargs: 2 | default: 3 | prompt: Provide a one-sentence caption for the provided image. -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/olympiadbench/olympiadbench.yaml: -------------------------------------------------------------------------------- 1 | group: olympiadbench 2 | task: 3 | - olympiadbench_test_en 4 | - olympiadbench_test_cn 5 | metadata: 6 | - version: 0.0 7 | -------------------------------------------------------------------------------- /Eagle2_5/streamlit_demo/gallery/prod_1.jpeg: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a1cce0deb560d17dd8fc55e8f6f7ca3bf8ed4ff558ca98b86616a8b32e855b73 3 | size 48925 4 | -------------------------------------------------------------------------------- /Eagle2_5/streamlit_demo/static/SimHei.ttf: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:336a838f4a78e150826be608dae69de59d50948c3d2b71760e096ae764154bdc 3 | size 9751960 4 | -------------------------------------------------------------------------------- /Eagle2_5/streamlit_demo/.streamlit/config.toml: -------------------------------------------------------------------------------- 1 | [theme] 2 | primaryColor="#F63366" 3 | backgroundColor="#FFFFFF" 4 | secondaryBackgroundColor="#F0F2F6" 5 | textColor="#262730" 6 | font="sans serif" 7 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco+/_refcoco.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco+ 2 | task: 3 | - refcoco+_seg_val 4 | - refcoco+_seg_testA 5 | - refcoco+_seg_testB 6 | - refcoco+_bbox_val 7 | - refcoco+_bbox_testA 8 | - refcoco+_bbox_testB 9 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/docvqa/docvqa_val.yaml: -------------------------------------------------------------------------------- 1 | task: "docvqa_val" 2 | test_split: validation 3 | metric_list: 4 | - metric: anls 5 | aggregation: mean 6 | higher_is_better: true 7 | include: _default_template_docvqa_yaml 8 | -------------------------------------------------------------------------------- /Eagle2_5/document/4.streamlit_demo.md: -------------------------------------------------------------------------------- 1 | 2 | # Streamlit demo 3 | 4 | we have a streamlit demo: 5 | 6 | ``` 7 | cd Eagle/Eagle2_5/streamlit_demo 8 | bash start_demo.sh 9 | # open demo http://localhost:8501 10 | ``` 11 | -------------------------------------------------------------------------------- /Eagle2_5/streamlit_demo/start_demo.sh: -------------------------------------------------------------------------------- 1 | python controller.py & 2 | python model_worker.py --model-path nvidia/Eagle2.5-8B \ 3 | --model-name Eagle2_5 --port 6214 --worker-address http://127.0.0.1:6214 & 4 | streamlit run app.py 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mmbench/mmbench_cn.yaml: -------------------------------------------------------------------------------- 1 | group: mmbench_cn 2 | task: 3 | - mmbench_cn_dev 4 | - mmbench_cn_test 5 | - mmbench_cn_cc 6 | metadata: 7 | version: 0.0 8 | gpt_eval_model_name: "gpt-3.5-turbo-0613" 9 | sys_prompt: "有如下几个选项:" -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mmbench/mmbench_en.yaml: -------------------------------------------------------------------------------- 1 | group: mmbench_en 2 | task: 3 | - mmbench_en_dev 4 | - mmbench_en_test 5 | metadata: 6 | version: 0.0 7 | sys_prompt: "There are several options:" 8 | gpt_eval_model_name: "gpt-3.5-turbo-0613" 9 | -------------------------------------------------------------------------------- /Eagle2_5/eaglevl/model/eagle2_5/__init__.py: -------------------------------------------------------------------------------- 1 | from .configuration_eagle2_5_vl import Eagle2_5_VLConfig 2 | from .modeling_eagle2_5_vl import Eagle2_5_VLForConditionalGeneration 3 | 4 | __all__ = ['Eagle2_5_VLConfig', 'Eagle2_5_VLForConditionalGeneration'] 5 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/infovqa/infovqa_val.yaml: -------------------------------------------------------------------------------- 1 | task: "infovqa_val" 2 | test_split: validation 3 | output_type: generate_until 4 | metric_list: 5 | - metric: anls 6 | aggregation: mean 7 | higher_is_better: true 8 | include: _default_template_infovqa_yaml -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco/_refcoco.yaml: -------------------------------------------------------------------------------- 1 | group: refcoco 2 | task: 3 | - refcoco_seg_test 4 | - refcoco_seg_val 5 | - refcoco_seg_testA 6 | - refcoco_seg_testB 7 | - refcoco_bbox_test 8 | - refcoco_bbox_val 9 | - refcoco_bbox_testA 10 | - refcoco_bbox_testB 11 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/textvqa/textvqa_test.yaml: -------------------------------------------------------------------------------- 1 | task: textvqa_test 2 | test_split: test 3 | metric_list: 4 | - metric: submission 5 | aggregation: !function utils.textvqa_aggreate_submissions 6 | higher_is_better: true 7 | include: _default_template_textvqa_yaml 8 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.jpg filter=lfs diff=lfs merge=lfs -text 2 | *.jpeg filter=lfs diff=lfs merge=lfs -text 3 | *.png filter=lfs diff=lfs merge=lfs -text 4 | *.ttf filter=lfs diff=lfs merge=lfs -text 5 | *.pt filter=lfs diff=lfs merge=lfs -text 6 | *.bin filter=lfs diff=lfs merge=lfs -text 7 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mmbench/mmbench_cn_test.yaml: -------------------------------------------------------------------------------- 1 | task: mmbench_cn_test 2 | test_split: test 3 | metric_list: 4 | - metric: submission 5 | aggregation: !function cn_utils.mmbench_aggregate_test_results 6 | higher_is_better: true 7 | include: _default_template_mmbench_cn_yaml 8 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mmbench/mmbench_en_test.yaml: -------------------------------------------------------------------------------- 1 | task: "mmbench_en_test" 2 | test_split: test 3 | include: _default_template_mmbench_en_yaml 4 | metric_list: 5 | - metric: submission 6 | aggregation: !function en_utils.mmbench_aggregate_test_results 7 | higher_is_better: true 8 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/_task_utils/file_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def generate_submission_file(file_name, args, subpath="submissions"): 5 | path = os.path.join(args.output_path, subpath) 6 | os.makedirs(path, exist_ok=True) 7 | path = os.path.join(path, file_name) 8 | return os.path.abspath(path) 9 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mmbench/mmbench.yaml: -------------------------------------------------------------------------------- 1 | group: mmbench 2 | task: 3 | - mmbench_en_dev 4 | - mmbench_en_test 5 | - mmbench_cn_dev 6 | - mmbench_cn_test 7 | - mmbench_cn_cc 8 | metadata: 9 | version: 0.0 10 | sys_prompt: "There are several options:" 11 | gpt_eval_model_name: "gpt-3.5-turbo-0613" -------------------------------------------------------------------------------- /Eagle/train_mem.py: -------------------------------------------------------------------------------- 1 | try: 2 | import transformer_engine 3 | import transformer_engine_extensions 4 | except: 5 | print("having trouble importing transformer-engine!") 6 | 7 | from train import train 8 | 9 | if __name__ == "__main__": 10 | train(attn_implementation="flash_attention_2") 11 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/cmmmu/_default_template_cmmmu_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/CMMMU 2 | output_type: generate_until 3 | doc_to_visual: !function utils.cmmmu_doc_to_visual 4 | doc_to_text: !function utils.cmmmu_doc_to_text 5 | doc_to_target: "answer" 6 | generation_kwargs: 7 | max_new_tokens: 16 8 | image_aspect_ratio: original -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mathvista/mathvista.yaml: -------------------------------------------------------------------------------- 1 | group: mathvista 2 | task: 3 | - mathvista_testmini 4 | - mathvista_test 5 | metadata: 6 | version: 0.0 7 | # gpt_eval_model_name: "gpt-4-0613" 8 | # gpt_eval_model_name: "gpt-4o-2024-05-13" 9 | gpt_eval_model_name: "gpt-4-0125-preview" 10 | quick_extract: false -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/vqav2/vqav2_test.yaml: -------------------------------------------------------------------------------- 1 | task: "vqav2_test" 2 | include: _default_template_vqav2_yaml 3 | test_split: test 4 | metric_list: 5 | - metric: submission 6 | aggregation: !function utils.vqav2_aggreate_submissions 7 | higher_is_better: true 8 | process_results: !function utils.vqav2_process_results_test 9 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/docvqa/docvqa_test.yaml: -------------------------------------------------------------------------------- 1 | task: "docvqa_test" 2 | test_split: test 3 | process_results: !function utils.docvqa_test_process_results 4 | metric_list: 5 | - metric: submission 6 | aggregation: !function utils.docvqa_test_aggregate_results 7 | higher_is_better: true 8 | include: _default_template_docvqa_yaml 9 | -------------------------------------------------------------------------------- /Eagle2_5/eaglevl/sp_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .globals import get_pg_manager, set_pg_manager 2 | from .input_utils import split_for_sequence_parallel, gather_from_sequence_parallel, ring_split_for_sequence_parallel, ring_gather_for_sequence_parallel 3 | from .attention import pre_process_for_sequence_parallel_attn, post_process_for_sequence_parallel_attn -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/vqav2/vqav2_val.yaml: -------------------------------------------------------------------------------- 1 | task: "vqav2_val" 2 | include: _default_template_vqav2_yaml 3 | test_split: validation 4 | metric_list: 5 | - metric: exact_match 6 | aggregation: mean 7 | higher_is_better: true 8 | ignore_case: true 9 | ignore_punctuation: true 10 | process_results: !function utils.vqav2_process_results_val 11 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/infovqa/infovqa_test.yaml: -------------------------------------------------------------------------------- 1 | task: "infovqa_test" 2 | test_split: test 3 | output_type: generate_until 4 | process_results: !function utils.infovqa_test_process_results 5 | metric_list: 6 | - metric: submission 7 | aggregation: !function utils.infovqa_test_aggregate_results 8 | higher_is_better: true 9 | include: _default_template_infovqa_yaml 10 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/models/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | AVAILABLE_MODELS = { 4 | "eagle": "Eagle", 5 | } 6 | 7 | for model_name, model_class in AVAILABLE_MODELS.items(): 8 | try: 9 | exec(f"from .{model_name} import {model_class}") 10 | except ImportError: 11 | pass 12 | 13 | 14 | import hf_transfer 15 | 16 | os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" 17 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mmbench/mmbench_cn_dev.yaml: -------------------------------------------------------------------------------- 1 | task: "mmbench_cn_dev" 2 | test_split: "dev" 3 | metric_list: 4 | - metric: gpt_eval_score 5 | aggregation: !function cn_utils.mmbench_aggregate_dev_results_eval 6 | higher_is_better: true 7 | - metric: submission 8 | higher_is_better: true 9 | aggregation: !function cn_utils.mmbench_aggregate_dev_results 10 | include: _default_template_mmbench_cn_yaml 11 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/textvqa/textvqa_val.yaml: -------------------------------------------------------------------------------- 1 | task: textvqa_val 2 | test_split: validation 3 | metric_list: 4 | - metric: exact_match 5 | aggregation: mean 6 | higher_is_better: true 7 | ignore_case: true 8 | ignore_punctuation: true 9 | - metric: submission 10 | aggregation: !function utils.textvqa_aggreate_submissions 11 | higher_is_better: true 12 | include: _default_template_textvqa_yaml 13 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mmbench/mmbench_en_dev.yaml: -------------------------------------------------------------------------------- 1 | task: "mmbench_en_dev" 2 | test_split: dev 3 | include: _default_template_mmbench_en_yaml 4 | metric_list: 5 | - metric: gpt_eval_score 6 | aggregation: !function en_utils.mmbench_aggregate_dev_results_eval 7 | higher_is_better: true 8 | - metric: submission 9 | aggregation: !function en_utils.mmbench_aggregate_dev_results_submission 10 | higher_is_better: true -------------------------------------------------------------------------------- /Eagle2_5/streamlit_demo/lasting_demo.sh: -------------------------------------------------------------------------------- 1 | 2 | submit_job --gpu 8 --tasks_per_node 1 --nodes 1 -n experiment --image /home/zhidingy/workspace/eagle2/torch2_test.sqsh \ 3 | --logroot workdir_lasting_demo_short \ 4 | --email_mode never \ 5 | --partition adlr_services \ 6 | --duration 0 \ 7 | --dependent_clones 0 \ 8 | -c "cd /home/zhidingy/workspace/eagle-video/streamlit_demo; bash start_demo.sh" 9 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_val.yaml: -------------------------------------------------------------------------------- 1 | group: vizwiz_vqa 2 | task: vizwiz_vqa_val 3 | test_split: val 4 | include: _default_template_vqa_yaml 5 | metric_list: 6 | - metric: exact_match 7 | aggregation: mean 8 | higher_is_better: true 9 | ignore_case: true 10 | ignore_punctuation: true 11 | # - metric: submission 12 | # aggregation: !function utils.vizwiz_vqa_aggreate_submissions 13 | # higher_is_better: true -------------------------------------------------------------------------------- /Eagle/eagle/constants.py: -------------------------------------------------------------------------------- 1 | # This file is from https://github.com/haotian-liu/LLaVA/ 2 | 3 | CONTROLLER_HEART_BEAT_EXPIRATION = 30 4 | WORKER_HEART_BEAT_INTERVAL = 15 5 | 6 | LOGDIR = "." 7 | 8 | # Model Constants 9 | IGNORE_INDEX = -100 10 | IMAGE_TOKEN_INDEX = -200 11 | DEFAULT_IMAGE_TOKEN = "" 12 | DEFAULT_IMAGE_PATCH_TOKEN = "" 13 | DEFAULT_IM_START_TOKEN = "" 14 | DEFAULT_IM_END_TOKEN = "" 15 | IMAGE_PLACEHOLDER = "" 16 | -------------------------------------------------------------------------------- /Eagle/scripts/convert_mmvet_for_eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import argparse 4 | 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument("--src", type=str) 7 | parser.add_argument("--dst", type=str) 8 | args = parser.parse_args() 9 | 10 | cur_result = {} 11 | 12 | for line in open(args.src): 13 | data = json.loads(line) 14 | qid = data['question_id'] 15 | cur_result[f'v1_{qid}'] = data['text'] 16 | 17 | with open(args.dst, 'w') as f: 18 | json.dump(cur_result, f, indent=2) 19 | -------------------------------------------------------------------------------- /Eagle/scripts/eval_lmms_eval/eval-vizwiz-vqav2.sh: -------------------------------------------------------------------------------- 1 | MODEL_PATH=$1 2 | MODEL_NAME=$2 3 | CONV_MODE=$3 4 | 5 | accelerate launch --num_processes=8\ 6 | evaluate_lmms_eval.py \ 7 | --model eagle \ 8 | --model_args pretrained=${MODEL_PATH},conv_template=${CONV_MODE} \ 9 | --tasks vizwiz_vqa_test,vqav2_test \ 10 | --batch_size 1 \ 11 | --log_samples \ 12 | --log_samples_suffix ${MODEL_NAME}_vizwiz_vqav2 \ 13 | --output_path ./logs/ -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/vqav2/_default_template_vqav2_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/VQAv2 2 | dataset_kwargs: 3 | token: True 4 | output_type: generate_until 5 | doc_to_visual: !function utils.vqav2_doc_to_visual 6 | doc_to_text: !function utils.vqav2_doc_to_text 7 | doc_to_target: "answer" 8 | generation_kwargs: 9 | max_new_tokens: 16 10 | metadata: 11 | - version: 0.0 12 | model_specific_prompt_kwargs: 13 | default: 14 | pre_prompt: "" 15 | post_prompt: "\nAnswer the question using a single word or phrase." -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_test.yaml: -------------------------------------------------------------------------------- 1 | group: vizwiz_vqa 2 | task: vizwiz_vqa_test 3 | test_split: test 4 | include: _default_template_vqa_yaml 5 | process_results: !function utils.vizwiz_vqa_process_results 6 | metric_list: 7 | # - metric: exact_match 8 | # aggregation: mean 9 | # higher_is_better: true 10 | # ignore_case: true 11 | # ignore_punctuation: true 12 | - metric: submission 13 | aggregation: !function utils.vizwiz_vqa_aggreate_submissions 14 | higher_is_better: true 15 | -------------------------------------------------------------------------------- /Eagle/scripts/eval_lmms_eval/eval-mmbench-mathvista.sh: -------------------------------------------------------------------------------- 1 | MODEL_PATH=$1 2 | MODEL_NAME=$2 3 | CONV_MODE=$3 4 | 5 | accelerate launch --num_processes=8\ 6 | evaluate_lmms_eval.py \ 7 | --model eagle \ 8 | --model_args pretrained=${MODEL_PATH},conv_template=${CONV_MODE} \ 9 | --tasks mmbench_en_dev,mathvista_testmini \ 10 | --batch_size 1 \ 11 | --log_samples \ 12 | --log_samples_suffix ${MODEL_NAME}_mmbench_mathvista \ 13 | --output_path ./logs/ -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/infovqa/_default_template_infovqa_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/DocVQA 2 | dataset_name: InfographicVQA 3 | dataset_kwargs: 4 | token: True 5 | doc_to_target: "answers" 6 | doc_to_visual: !function utils.infovqa_doc_to_visual 7 | doc_to_text: !function utils.infovqa_doc_to_text 8 | generation_kwargs: 9 | max_new_tokens: 32 10 | temperature: 0 11 | do_sample: False 12 | model_specific_prompt_kwargs: 13 | default: 14 | pre_prompt: "" 15 | post_prompt: "\nAnswer the question using a single word or phrase." -------------------------------------------------------------------------------- /Eagle/scripts/convert_gqa_for_eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import argparse 4 | 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument("--src", type=str) 7 | parser.add_argument("--dst", type=str) 8 | args = parser.parse_args() 9 | 10 | all_answers = [] 11 | for line_idx, line in enumerate(open(args.src)): 12 | res = json.loads(line) 13 | question_id = res['question_id'] 14 | text = res['text'].rstrip('.').lower() 15 | all_answers.append({"questionId": question_id, "prediction": text}) 16 | 17 | with open(args.dst, 'w') as f: 18 | json.dump(all_answers, f) 19 | -------------------------------------------------------------------------------- /Eagle/scripts/eval_lmms_eval/eval-mme-seed-pope-sqa-gqa-ocrbench-textvqa-chartqa.sh: -------------------------------------------------------------------------------- 1 | MODEL_PATH=$1 2 | MODEL_NAME=$2 3 | CONV_MODE=$3 4 | 5 | accelerate launch --num_processes=8\ 6 | evaluate_lmms_eval.py \ 7 | --model eagle \ 8 | --model_args pretrained=${MODEL_PATH},conv_template=${CONV_MODE} \ 9 | --tasks mme,seed_bench,pope,scienceqa_img,gqa,ocrbench,textvqa_val,chartqa \ 10 | --batch_size 1 \ 11 | --log_samples \ 12 | --log_samples_suffix ${MODEL_NAME}_mmbench_mathvista_seedbench \ 13 | --output_path ./logs/ -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/cmmmu/cmmmu_test.yaml: -------------------------------------------------------------------------------- 1 | task: "cmmmu_test" 2 | test_split: test 3 | # The return value of process_results will be used by metrics 4 | process_results: !function utils.cmmmu_process_test_results_for_submission 5 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 6 | metric_list: 7 | - metric: submission 8 | aggregation: !function utils.cmmmu_test_aggregate_results_for_submission 9 | higher_is_better: false 10 | metadata: 11 | - version: 0.0 12 | include: _default_template_cmmmu_yaml 13 | -------------------------------------------------------------------------------- /Eagle/scripts/eval/mmmu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export CUDA_VISIBLE_DEVICES=0 3 | MODEL_CKPT=$1 4 | MODEL_NAME=$2 5 | 6 | SAVE_DIR=playground/data/eval/mmmu/${MODEL_NAME} 7 | SPLIT=validation 8 | MMMU_DATA_ROOT=./playground/data/eval/MMMU 9 | 10 | python eagle/eval/model_vqa_mmmu.py \ 11 | --model_path ${MODEL_CKPT} \ 12 | --split ${SPLIT} \ 13 | --output_path ${SAVE_DIR}/${SPLIT}_output.json \ 14 | 15 | output_file=${SAVE_DIR}/${SPLIT}_output.json 16 | echo "saving model answer at $output_file" 17 | 18 | python ./eval_utils/mmmu/main_eval_only.py --output_path ${SAVE_DIR}/${SPLIT}_output.json -------------------------------------------------------------------------------- /Eagle2_5/streamlit_demo/constants.py: -------------------------------------------------------------------------------- 1 | # This file is adopted from the InternVL project 2 | # (https://github.com/OpenGVLab/InternVL), licensed under the MIT License. 3 | # 4 | # -------------------------------------------------------- 5 | # InternVL 6 | # Copyright (c) 2023 OpenGVLab 7 | # Licensed under The MIT License 8 | # -------------------------------------------------------- 9 | 10 | CONTROLLER_HEART_BEAT_EXPIRATION = 30 11 | WORKER_HEART_BEAT_INTERVAL = 15 12 | 13 | LOGDIR = 'logs/' 14 | 15 | server_error_msg = '**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**' 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__ 3 | *.pyc 4 | *.egg-info 5 | dist 6 | 7 | # Log 8 | *.log 9 | *.log.* 10 | logs/ 11 | # *.json 12 | *.jsonl 13 | images/* 14 | 15 | # Data 16 | !**/alpaca-data-conversation.json 17 | 18 | # Editor 19 | .idea 20 | *.swp 21 | .github 22 | .vscode 23 | 24 | # Other 25 | .DS_Store 26 | wandb 27 | output 28 | examples/* 29 | 30 | eagle/eval/table/*.json 31 | eagle/eval/table/results/*.json 32 | checkpoints 33 | ckpts* 34 | slurm_logs/* 35 | 36 | datasets/* 37 | playground/* 38 | gradio_tmp/* 39 | 40 | .ipynb_checkpoints 41 | *.ipynb 42 | 43 | unit_test*.* 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/textvqa/_default_template_textvqa_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/textvqa 2 | output_type: generate_until 3 | doc_to_visual: !function utils.textvqa_doc_to_visual 4 | doc_to_text: !function utils.textvqa_doc_to_text 5 | doc_to_target: "answer" 6 | generation_kwargs: 7 | until: 8 | - "ASSISTANT:" 9 | process_results: !function utils.textvqa_process_results 10 | model_specific_prompt_kwargs: 11 | default: 12 | pre_prompt: "" 13 | post_prompt: "\nAnswer the question using a single word or phrase." 14 | ocr: true 15 | qwen_vl: 16 | pre_prompt: "" 17 | post_prompt: " Answer:" 18 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/seedbench/seedbench_ppl.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/SEED-Bench 2 | dataset_kwargs: 3 | token: True 4 | task: "seedbench_ppl" 5 | test_split: test 6 | output_type: multiple_choice 7 | doc_to_visual: !function utils.seed_doc_to_visual 8 | doc_to_text: !function utils.seed_doc_to_text_mc 9 | doc_to_choice : !function utils.seed_doc_to_choice 10 | doc_to_target: !function utils.seed_doc_to_mc_target 11 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 12 | metric_list: 13 | - metric: acc 14 | metadata: 15 | - version: 0.0 -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/docvqa/_default_template_docvqa_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/DocVQA 2 | dataset_name: DocVQA 3 | dataset_kwargs: 4 | token: True 5 | output_type: generate_until 6 | doc_to_visual: !function utils.docvqa_doc_to_visual 7 | doc_to_text: !function utils.docvqa_doc_to_text 8 | doc_to_target: "answers" 9 | generation_kwargs: 10 | max_new_tokens: 32 11 | temperature: 0 12 | do_sample: False 13 | model_specific_prompt_kwargs: 14 | default: 15 | pre_prompt: "" 16 | post_prompt: "\nAnswer the question using a single word or phrase." 17 | qwen_vl: 18 | pre_prompt: "" 19 | post_prompt: " Answer:" 20 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/cmmmu/cmmmu_val.yaml: -------------------------------------------------------------------------------- 1 | task: "cmmmu_val" 2 | test_split: val 3 | # The return value of process_results will be used by metrics 4 | process_results: !function utils.cmmmu_process_results 5 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 6 | generation_kwargs: 7 | max_new_tokens: 16 8 | image_aspect_ratio: original 9 | metric_list: 10 | - metric: cmmmu_acc 11 | aggregation: !function utils.cmmmu_aggregate_results 12 | higher_is_better: true 13 | metadata: 14 | - version: 0.0 15 | include: _default_template_cmmmu_yaml 16 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/vizwiz_vqa/_default_template_vqa_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/VizWiz-VQA 2 | output_type: generate_until 3 | doc_to_visual: !function utils.vizwiz_vqa_doc_to_visual 4 | doc_to_text: !function utils.vizwiz_vqa_doc_to_text 5 | doc_to_target: "answer" 6 | generation_kwargs: 7 | until: 8 | - "ASSISTANT:" 9 | metadata: 10 | - version: 0.0 11 | model_specific_prompt_kwargs: 12 | default: 13 | pre_prompt: "" 14 | post_prompt: "\nWhen the provided information is insufficient, respond with 'Unanswerable'.\nAnswer the question using a single word or phrase." 15 | process_results: !function utils.vizwiz_vqa_process_results 16 | -------------------------------------------------------------------------------- /Eagle/scripts/eval/pope.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | CKPT=$1 3 | NAME=$2 4 | 5 | python -m eagle.eval.model_vqa_loader \ 6 | --model-path $CKPT \ 7 | --question-file ./playground/data/eval/pope/llava_pope_test.jsonl \ 8 | --image-folder ./playground/data/eval/pope/val2014 \ 9 | --answers-file ./playground/data/eval/pope/answers/${NAME}.jsonl \ 10 | --temperature 0 \ 11 | --conv-mode vicuna_v1 12 | 13 | python eagle/eval/eval_pope.py \ 14 | --annotation-dir ./playground/data/eval/pope/coco \ 15 | --question-file ./playground/data/eval/pope/llava_pope_test.jsonl \ 16 | --result-file ./playground/data/eval/pope/answers/${NAME}.jsonl 17 | -------------------------------------------------------------------------------- /Eagle/scripts/zero2.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "initial_scale_power": 16, 7 | "hysteresis": 2, 8 | "min_loss_scale": 1 9 | }, 10 | "bf16": { 11 | "enabled": "auto" 12 | }, 13 | "train_micro_batch_size_per_gpu": "auto", 14 | "train_batch_size": "auto", 15 | "gradient_accumulation_steps": "auto", 16 | "zero_optimization": { 17 | "stage": 2, 18 | "overlap_comm": false, 19 | "contiguous_gradients": true, 20 | "sub_group_size": 1e9, 21 | "reduce_bucket_size": "auto" 22 | } 23 | } -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/stvqa/stvqa.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/ST-VQA 2 | task: "stvqa" 3 | test_split: test 4 | output_type: generate_until 5 | doc_to_visual: !function utils.stvqa_doc_to_visual 6 | doc_to_text: !function utils.stvqa_doc_to_text 7 | doc_to_target: "answers" 8 | generation_kwargs: 9 | max_new_tokens: 32 10 | temperature: 0 11 | do_sample: False 12 | process_results: !function utils.stvqa_process_results 13 | metric_list: 14 | - metric: submission 15 | aggregation: !function utils.stvqa_aggregate_submissions 16 | model_specific_prompt_kwargs: 17 | default: 18 | pre_prompt: "" 19 | post_prompt: "\nAnswer the question using a single word or phrase." 20 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/ocrbench/ocrbench.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: echo840/OCRBench 2 | dataset_kwargs: 3 | token: True 4 | task: "ocrbench" 5 | test_split: test 6 | output_type: generate_until 7 | doc_to_visual: !function utils.ocrbench_doc_to_visual 8 | doc_to_text: !function utils.ocrbench_doc_to_text 9 | doc_to_target: "answer" 10 | generation_kwargs: 11 | max_new_tokens: 128 12 | temperature: 0 13 | top_p: 0 14 | num_beams: 1 15 | do_sample: false 16 | process_results: !function utils.ocrbench_process_results 17 | metric_list: 18 | - metric: ocrbench_accuracy 19 | aggregation: !function utils.ocrbench_aggregate_accuracy 20 | higher_is_better: true 21 | metadata: 22 | - version: 0.0 -------------------------------------------------------------------------------- /Eagle/scripts/eval/textvqa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | CKPT=$1 3 | NAME=$2 4 | DATA_ROOT=$(readlink -f "./playground/data/eval/textvqa/") 5 | 6 | python -m eagle.eval.model_vqa_loader \ 7 | --model-path $CKPT \ 8 | --question-file ./playground/data/eval/textvqa/llava_textvqa_val_v051_ocr.jsonl \ 9 | --image-folder ./playground/data/eval/textvqa/train_images \ 10 | --answers-file ./playground/data/eval/textvqa/answers/${NAME}.jsonl \ 11 | --temperature 0 \ 12 | --conv-mode vicuna_v1 13 | 14 | python -m eagle.eval.eval_textvqa \ 15 | --annotation-file ./playground/data/eval/textvqa/TextVQA_0.5.1_val.json \ 16 | --result-file ./playground/data/eval/textvqa/answers/${NAME}.jsonl 17 | -------------------------------------------------------------------------------- /Eagle/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name="eagle", 5 | version="0.0.1", 6 | packages=find_packages(include=["eagle", "eagle.*"]), 7 | package_data={ 8 | "eagle": ["*"], 9 | "eagle.model": ["*"], 10 | "eagle.model.language_model": ["*"], 11 | }, 12 | py_modules=[ 13 | "eagle.conversation", 14 | "eagle.constants", 15 | "eagle.model.builder", 16 | "eagle.model.language_model", 17 | "eagle.utils", 18 | "eagle.mm_utils", 19 | ], 20 | install_requires=[ 21 | # Add any dependencies required by the eagle module 22 | ], 23 | include_package_data=True, 24 | ) 25 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mmbench/_default_template_mmbench_cn_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/MMBench 2 | dataset_kwargs: 3 | token: True 4 | doc_to_target: "answer" 5 | dataset_name: "cn" 6 | output_type: generate_until 7 | doc_to_visual: !function cn_utils.mmbench_doc_to_visual 8 | doc_to_text: !function cn_utils.mmbench_doc_to_text 9 | generation_kwargs: 10 | max_new_tokens: 256 11 | temperature: 0 12 | top_p: 0 13 | num_beams: 1 14 | do_sample: false 15 | process_results: !function cn_utils.mmbench_process_results 16 | model_specific_prompt_kwargs: 17 | default: 18 | pre_prompt: "" 19 | post_prompt: "\n请直接使用所提供的选项字母作为答案回答。" 20 | model_specific_generation_kwargs: 21 | llava: 22 | image_aspect_ratio: original 23 | -------------------------------------------------------------------------------- /Eagle2_5/eaglevl/patch/__init__.py: -------------------------------------------------------------------------------- 1 | from .pad_data_collator import pad_data_collator, get_collator 2 | from .train_sampler_patch import replace_train_sampler, replace_train_sampler_for_online_packing, OnlinePackingGroupedSampler 3 | from .fused_monkey_patch import replace_liger_fused_ops 4 | from .train_sampler_patch import Packer 5 | from .packing_attention import patch_packing_attention 6 | __all__ = ['replace_llama_attn_with_flash_attn', 7 | 'replace_llama2_attn_with_flash_attn', 8 | 'replace_train_sampler', 9 | 'replace_train_sampler_for_online_packing', 10 | 'OnlinePackingGroupedSampler', 11 | 'pad_data_collator', 12 | 'get_collator', 13 | 'replace_liger_fused_ops'] -------------------------------------------------------------------------------- /Eagle/scripts/eval/vizwiz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | CKPT=$1 3 | NAME=$2 4 | DATA_ROOT=$(readlink -f "./playground/data/eval/vizwiz") 5 | LOCAL_ANSWER_DIR="./playground/data/eval_local_files/vizwiz" 6 | 7 | python -m eagle.eval.model_vqa_loader \ 8 | --model-path $CKPT \ 9 | --question-file $DATA_ROOT/llava_test.jsonl \ 10 | --image-folder $DATA_ROOT/test \ 11 | --answers-file $LOCAL_ANSWER_DIR/$NAME/$NAME.jsonl \ 12 | --temperature 0 \ 13 | --conv-mode vicuna_v1 14 | 15 | python scripts/convert_vizwiz_for_submission.py \ 16 | --annotation-file $DATA_ROOT/llava_test.jsonl \ 17 | --result-file $LOCAL_ANSWER_DIR/$NAME/$NAME.jsonl \ 18 | --result-upload-file $LOCAL_ANSWER_DIR/$NAME/answers_upload/vizwiz_test_$NAME.json 19 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/filters/decontamination.py: -------------------------------------------------------------------------------- 1 | from lmms_eval.api.filter import Filter 2 | 3 | 4 | class DecontaminationFilter(Filter): 5 | """ 6 | A filter which evaluates 7 | """ 8 | 9 | name = "track_decontamination" 10 | 11 | def __init__(self, path) -> None: 12 | """ 13 | 14 | TODO: make sure only ever run one time on the train set (should this be cached as a class var? keyed by value for "path"). 15 | should further cache result on a given (task_name, doc_id) 16 | """ 17 | self._decontam_results = None 18 | 19 | def apply(self, resps, docs) -> None: 20 | """ 21 | Return {"no_contamination", "only_contamination"} keys for the 2 different subsets 22 | """ 23 | pass 24 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/multidocvqa/multidocvqa_test.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/MP-DocVQA 2 | task: "multidocvqa_test" 3 | test_split: test 4 | output_type: generate_until 5 | doc_to_visual: !function utils.multidocvqa_doc_to_visual 6 | doc_to_text: !function utils.multidocvqa_doc_to_text 7 | doc_to_target: "answers" 8 | generation_kwargs: 9 | max_new_tokens: 32 10 | temperature: 0 11 | do_sample: False 12 | process_results: !function utils.multidocvqa_process_test_results_for_submission 13 | metric_list: 14 | - metric: submission 15 | aggregation: !function utils.multidocvqa_test_aggregate_results_for_submission 16 | model_specific_prompt_kwargs: 17 | default: 18 | pre_prompt: "" 19 | post_prompt: "\nAnswer the question using a single word or phrase." 20 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mmmu/mmmu_val.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/MMMU 2 | task: "mmmu_val" 3 | test_split: validation 4 | output_type: generate_until 5 | doc_to_visual: !function utils.mmmu_doc_to_visual 6 | doc_to_text: !function utils.mmmu_doc_to_text 7 | doc_to_target: "answer" 8 | # The return value of process_results will be used by metrics 9 | process_results: !function utils.mmmu_process_results 10 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 11 | generation_kwargs: 12 | max_new_tokens: 16 13 | image_aspect_ratio: original 14 | metric_list: 15 | - metric: mmmu_acc 16 | aggregation: !function utils.mmmu_aggregate_results 17 | higher_is_better: true 18 | metadata: 19 | - version: 0.0 -------------------------------------------------------------------------------- /Eagle2_5/streamlit_demo/forward_port.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # 从文件中读取 IP 地址 5 | SERVER_IP=10.49.134.111 6 | # 验证 IP 地址格式(可选) 7 | if [[ ! $SERVER_IP =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then 8 | echo "Error: Invalid IP address format in $IP_FILE" 9 | exit 1 10 | fi 11 | 12 | # 目标端口和本地监听端口 13 | TARGET_PORT=8501 14 | LOCAL_PORT=9120 15 | 16 | # 打印信息 17 | echo "Setting up socat to forward local port $LOCAL_PORT to $SERVER_IP:$TARGET_PORT" 18 | 19 | # 启动 socat 20 | socat TCP-LISTEN:$LOCAL_PORT,fork TCP:$SERVER_IP:$TARGET_PORT 21 | # SOCAT_PID=$! 22 | 23 | # 打印成功信息 24 | # echo "Socat is running with PID $SOCAT_PID. Forwarding $LOCAL_PORT to $SERVER_IP:$TARGET_PORT" 25 | 26 | # 可选:等待用户手动终止 27 | # read -p "Press Enter to stop socat and exit..." 28 | 29 | # 可选:停止 socat 30 | # kill $SOCAT_PID 31 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mmmu/mmmu_test.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/MMMU 2 | task: "mmmu_test" 3 | test_split: test 4 | output_type: generate_until 5 | doc_to_visual: !function utils.mmmu_doc_to_visual 6 | doc_to_text: !function utils.mmmu_doc_to_text 7 | doc_to_target: "answer" 8 | # The return value of process_results will be used by metrics 9 | process_results: !function utils.mmmu_process_results 10 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 11 | generation_kwargs: 12 | max_new_tokens: 16 13 | image_aspect_ratio: original 14 | metric_list: 15 | - metric: submission 16 | aggregation: !function utils.mmmu_test_aggregate_results_for_submission 17 | higher_is_better: true 18 | metadata: 19 | - version: 0.0 -------------------------------------------------------------------------------- /Eagle/scripts/eval/sqa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | CKPT=$1 3 | NAME=$2 4 | 5 | python -m eagle.eval.model_vqa_science \ 6 | --model-path $CKPT \ 7 | --question-file ./playground/data/eval/scienceqa/llava_test_CQM-A.json \ 8 | --image-folder ./playground/data/eval/scienceqa/images/test \ 9 | --answers-file ./playground/data/eval/scienceqa/answers/${NAME}.jsonl \ 10 | --single-pred-prompt \ 11 | --temperature 0 \ 12 | --conv-mode vicuna_v1 13 | 14 | python eagle/eval/eval_science_qa.py \ 15 | --base-dir ./playground/data/eval/scienceqa \ 16 | --result-file ./playground/data/eval/scienceqa/answers/${NAME}.jsonl \ 17 | --output-file ./playground/data/eval/scienceqa/answers/${NAME}_output.jsonl \ 18 | --output-result ./playground/data/eval/scienceqa/answers/${NAME}_result.json 19 | -------------------------------------------------------------------------------- /Eagle2_5/shell/submit_prepare_job.sh: -------------------------------------------------------------------------------- 1 | set -a 2 | source .env 3 | set +a 4 | 5 | RECIPE_PATH=${1:-"local_playground/recipe/stage1.json"} 6 | NODES=${2:-1} 7 | LOG_DIR=${3:-"work_dirs/data_prepare"} 8 | TOKENIZER=${4:-"Qwen/Qwen3-1.7B"} 9 | LAUNCHER=${5:-"pytorch"} 10 | 11 | 12 | bash shell/prepare.sh ${RECIPE_PATH} ${NODES} ${LOG_DIR} ${TOKENIZER} ${LAUNCHER} 13 | 14 | # submit_job \ 15 | # --image=${TRAINING_IMAGE_PATH} \ 16 | # --gpu 8 \ 17 | # --tasks_per_node 8 \ 18 | # --nodes ${NODES} \ 19 | # -n prepare_data \ 20 | # --logroot ${LOG_DIR} \ 21 | # --email_mode never \ 22 | # --duration 0 \ 23 | # --cpu 128 \ 24 | # --dependent_clones 3 \ 25 | # --partition adlr_services \ 26 | # -c "bash shell/eagle_abc/prepare.sh ${RECIPE_PATH} ${NODES} ${LOG_DIR} ${TOKENIZER} ${LAUNCHER}" 27 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mmbench/_default_template_mmbench_en_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/MMBench 2 | dataset_kwargs: 3 | token: True 4 | doc_to_target: "answer" 5 | model_specific_prompt_kwargs: 6 | default: 7 | pre_prompt: "" 8 | post_prompt: "\nAnswer with the option's letter from the given choices directly." 9 | doc_to_visual: !function en_utils.mmbench_doc_to_visual 10 | doc_to_text: !function en_utils.mmbench_doc_to_text 11 | doc_to_target: "answer" 12 | process_results: !function en_utils.mmbench_process_results 13 | model_specific_generation_kwargs: 14 | llava: 15 | image_aspect_ratio: original 16 | output_type: generate_until 17 | dataset_name: "en" 18 | generation_kwargs: 19 | until: 20 | - "ASSISTANT:" 21 | max_new_tokens: 1024 22 | temperature: 0 23 | top_p: 0 24 | num_beams: 1 25 | do_sample: false 26 | -------------------------------------------------------------------------------- /Eagle2_5/deepspeed_configs/zero_stage1_config_nooptim.json: -------------------------------------------------------------------------------- 1 | { 2 | "zero_optimization": { 3 | "stage": 1, 4 | "allgather_partitions": true, 5 | "allgather_bucket_size": 1e9, 6 | "overlap_comm": true, 7 | "reduce_scatter": true, 8 | "reduce_bucket_size": 1e9, 9 | "contiguous_gradients": true 10 | }, 11 | "fp16": { 12 | "enabled": "auto", 13 | "auto_cast": true, 14 | "loss_scale": 0, 15 | "initial_scale_power": 32, 16 | "loss_scale_window": 1000, 17 | "hysteresis": 2, 18 | "min_loss_scale": 1 19 | }, 20 | "bf16": { 21 | "enabled": "auto" 22 | }, 23 | "gradient_accumulation_steps": "auto", 24 | "gradient_clipping": "auto", 25 | "steps_per_print": 2000, 26 | "train_batch_size": "auto", 27 | "train_micro_batch_size_per_gpu": "auto", 28 | "wall_clock_breakdown": true 29 | } -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/olympiadbench/olympiadbench_test_cn.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/OlympiadBench 2 | dataset_kwargs: 3 | token: True 4 | task : "olympiadbench_test_cn" 5 | test_split: test_cn 6 | output_type: generate_until 7 | doc_to_visual: !function cn_utils.olympiadbench_doc_to_visual 8 | doc_to_text: !function cn_utils.olympiadbench_doc_to_text 9 | doc_to_target: "answer" 10 | generation_kwargs: 11 | until: 12 | - "ASSISTANT:" 13 | max_new_tokens: 1024 14 | temperature: 0 15 | top_p: 0 16 | num_beams: 1 17 | do_sample: false 18 | process_results: !function cn_utils.olympiadbench_process_results 19 | metric_list: 20 | - metric: submission 21 | aggregation: !function cn_utils.olympiadbench_aggregate_results 22 | higher_is_better: true 23 | - metric: exact_match 24 | aggregation: mean 25 | higher_is_better: true -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/olympiadbench/olympiadbench_test_en.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/OlympiadBench 2 | dataset_kwargs: 3 | token: True 4 | task : "olympiadbench_test_en" 5 | test_split: test_en 6 | output_type: generate_until 7 | doc_to_visual: !function en_utils.olympiadbench_doc_to_visual 8 | doc_to_text: !function en_utils.olympiadbench_doc_to_text 9 | doc_to_target: "answer" 10 | generation_kwargs: 11 | until: 12 | - "ASSISTANT:" 13 | max_new_tokens: 1024 14 | temperature: 0 15 | top_p: 0 16 | num_beams: 1 17 | do_sample: false 18 | process_results: !function en_utils.olympiadbench_process_results 19 | metric_list: 20 | - metric: submission 21 | aggregation: !function en_utils.olympiadbench_aggregate_results 22 | higher_is_better: true 23 | - metric: exact_match 24 | aggregation: mean 25 | higher_is_better: true -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/coco_cap/coco2017_cap_test.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/COCO-Caption2017 2 | dataset_kwargs: 3 | token: True 4 | task : "coco2017_cap_test" 5 | group : "coco_caption2017" 6 | test_split: test 7 | output_type: generate_until 8 | doc_to_visual: !function utils.coco_doc_to_visual 9 | doc_to_text: !function utils.coco_doc_to_text 10 | doc_to_target: "answer" 11 | generation_kwargs: 12 | max_new_tokens: 128 13 | temperature: 0 14 | top_p: 0 15 | num_beams: 1 16 | do_sample: false 17 | process_results: !function utils.coco_test_process_result 18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 19 | metric_list: 20 | - metric: coco_passthrough 21 | aggregation : !function utils.coco_test_aggregation_result 22 | higher_is_better : true 23 | metadata: 24 | - version: 0.0 -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/multidocvqa/multidocvqa_val.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/MP-DocVQA 2 | task: "multidocvqa_val" 3 | test_split: val 4 | output_type: generate_until 5 | doc_to_visual: !function utils.multidocvqa_doc_to_visual 6 | doc_to_text: !function utils.multidocvqa_doc_to_text 7 | doc_to_target: "answers" 8 | generation_kwargs: 9 | max_new_tokens: 32 10 | temperature: 0 11 | do_sample: False 12 | process_results: !function utils.multidocvqa_process_results 13 | metric_list: 14 | - metric: anls 15 | aggregation: !function utils.multidocvqa_aggregate_results_anls 16 | higher_is_better: true 17 | - metric: accuracy 18 | aggregation: !function utils.multidocvqa_aggregate_results_accuracy 19 | higher_is_better: true 20 | model_specific_prompt_kwargs: 21 | default: 22 | pre_prompt: "" 23 | post_prompt: "\nAnswer the question using a single word or phrase." 24 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/gqa/gqa.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/GQA 2 | dataset_name: testdev_balanced_instructions 3 | dataset_kwargs: 4 | token: True 5 | task: "gqa" 6 | test_split: testdev 7 | output_type: generate_until 8 | doc_to_visual: !function utils.gqa_doc_to_visual 9 | doc_to_text: !function utils.gqa_doc_to_text 10 | doc_to_target: "answer" 11 | generation_kwargs: 12 | max_new_tokens: 16 13 | temperature: 0 14 | top_p: 0 15 | num_beams: 1 16 | do_sample: false 17 | metric_list: 18 | - metric: exact_match 19 | aggregation: mean 20 | higher_is_better: true 21 | ignore_case: true 22 | ignore_punctuation: true 23 | metadata: 24 | - version: 0.0 25 | 26 | model_specific_prompt_kwargs: 27 | default: 28 | pre_prompt: "" 29 | post_prompt: "\nAnswer the question using a single word or phrase." 30 | qwen_vl: 31 | pre_prompt: "" 32 | post_prompt: " Answer:" -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/coco_cap/coco2014_cap_test.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/COCO-Caption 2 | dataset_kwargs: 3 | token: True 4 | task : "coco2014_cap_test" 5 | group : "coco_caption" 6 | test_split: test 7 | output_type: generate_until 8 | doc_to_visual: !function utils.coco_doc_to_visual 9 | doc_to_text: "Provide a one-sentence caption for the provided image." 10 | doc_to_target: "answer" 11 | generation_kwargs: 12 | max_new_tokens: 128 13 | temperature: 0 14 | top_p: 0 15 | num_beams: 1 16 | do_sample: false 17 | process_results: !function utils.coco_test_process_result 18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 19 | metric_list: 20 | - metric: coco_passthrough 21 | aggregation : !function utils.coco_test_aggregation_result 22 | higher_is_better : true 23 | metadata: 24 | - version: 0.0 -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/gqa/utils.py: -------------------------------------------------------------------------------- 1 | from datasets import load_dataset 2 | 3 | GQA_RAW_IMAGE_DATASET = None 4 | GQA_ID2IMAGE = None 5 | 6 | 7 | def gqa_doc_to_visual(doc): 8 | global GQA_RAW_IMAGE_DATASET 9 | global GQA_ID2IMAGE 10 | if GQA_RAW_IMAGE_DATASET is None: 11 | GQA_RAW_IMAGE_DATASET = load_dataset("lmms-lab/GQA", "testdev_balanced_images", split="testdev", token=True) 12 | GQA_ID2IMAGE = {} 13 | for row in GQA_RAW_IMAGE_DATASET: 14 | GQA_ID2IMAGE[row["id"]] = row["image"].convert("RGB") 15 | image = GQA_ID2IMAGE[doc["imageId"]] 16 | return [image] 17 | 18 | 19 | def gqa_doc_to_text(doc, model_specific_prompt_kwargs): 20 | question = doc["question"] 21 | pre_prompt = model_specific_prompt_kwargs["pre_prompt"] 22 | post_prompt = model_specific_prompt_kwargs["post_prompt"] 23 | return f"{pre_prompt}{question}{post_prompt}" 24 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/ok_vqa/_default_template_vqa_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/OK-VQA 2 | output_type: generate_until 3 | doc_to_visual: !function utils.ok_vqa_doc_to_visual 4 | doc_to_text: !function utils.ok_vqa_doc_to_text 5 | doc_to_target: "answer" 6 | generation_kwargs: 7 | until: 8 | - "ASSISTANT:" 9 | metric_list: 10 | - metric: exact_match 11 | aggregation: mean 12 | higher_is_better: true 13 | ignore_case: true 14 | ignore_punctuation: true 15 | - metric: submission 16 | aggregation: !function utils.ok_vqa_aggreate_submissions 17 | higher_is_better: true 18 | process_results: !function utils.ok_vqa_process_results 19 | model_specific_prompt_kwargs: 20 | default: 21 | pre_prompt: "" 22 | post_prompt: "\nWhen the provided information is insufficient, respond with 'Unanswerable'.\nAnswer the question using a single word or phrase." 23 | metadata: 24 | - version: 0.0 -------------------------------------------------------------------------------- /Eagle2_5/eaglevl/train/constants.py: -------------------------------------------------------------------------------- 1 | IMG_CONTEXT_TOKEN = '' 2 | IMG_START_TOKEN = '' 3 | IMG_END_TOKEN = '' 4 | QUAD_START_TOKEN = '' 5 | QUAD_END_TOKEN = '' 6 | REF_START_TOKEN = '' 7 | REF_END_TOKEN = '' 8 | BOX_START_TOKEN = '' 9 | BOX_END_TOKEN = '' 10 | INTERVAL_START_TOKEN = '' 11 | INTERVAL_END_TOKEN = '' 12 | IMAGENET_MEAN = (0.485, 0.456, 0.406) 13 | IMAGENET_STD = (0.229, 0.224, 0.225) 14 | CLIP_MEAN = (0.4814546, 0.4578275, 0.40821073) 15 | CLIP_STD = (0.2686295, 0.2613025, 0.2757711) 16 | SIGLIP_MEAN = (0.5, 0.5, 0.5) 17 | SIGLIP_STD = (0.5, 0.5, 0.5) 18 | 19 | special_tokens_list = [ 20 | IMG_CONTEXT_TOKEN, 21 | IMG_START_TOKEN, IMG_END_TOKEN, 22 | BOX_START_TOKEN, BOX_END_TOKEN, 23 | QUAD_START_TOKEN, QUAD_END_TOKEN, 24 | REF_START_TOKEN, REF_END_TOKEN, 25 | INTERVAL_START_TOKEN, INTERVAL_END_TOKEN, 26 | ] 27 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/nocaps/nocaps_test.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/NoCaps 2 | dataset_kwargs: 3 | token: True 4 | task : "nocaps_test" 5 | group : "nocaps_caption" 6 | test_split: test 7 | output_type: generate_until 8 | doc_to_visual: !function utils.nocaps_doc_to_visual 9 | doc_to_text: !function utils.nocaps_doc_to_text 10 | doc_to_target: "annotations_captions" 11 | generation_kwargs: 12 | max_new_tokens: 64 13 | temperature: 0 14 | top_p: 0 15 | num_beams: 1 16 | do_sample: false 17 | process_results: !function utils.nocaps_test_process_result 18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 19 | metric_list: 20 | - metric: nocaps_passthrough 21 | aggregation : !function utils.nocaps_test_aggregation_result 22 | higher_is_better : true 23 | metadata: 24 | - version: 0.0 25 | include: _default_template_nocaps_yaml -------------------------------------------------------------------------------- /Eagle/scripts/zero3.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "initial_scale_power": 16, 7 | "hysteresis": 2, 8 | "min_loss_scale": 1 9 | }, 10 | "bf16": { 11 | "enabled": "auto" 12 | }, 13 | "train_micro_batch_size_per_gpu": "auto", 14 | "train_batch_size": "auto", 15 | "gradient_accumulation_steps": "auto", 16 | "zero_optimization": { 17 | "stage": 3, 18 | "overlap_comm": true, 19 | "contiguous_gradients": true, 20 | "sub_group_size": 1e9, 21 | "reduce_bucket_size": "auto", 22 | "stage3_prefetch_bucket_size": "auto", 23 | "stage3_param_persistence_threshold": "auto", 24 | "stage3_max_live_parameters": 1e9, 25 | "stage3_max_reuse_distance": 1e9, 26 | "stage3_gather_16bit_weights_on_model_save": true 27 | } 28 | } -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mathvista/mathvista_test.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: AI4Math/MathVista 2 | dataset_kwargs: 3 | token: True 4 | task: "mathvista_test" 5 | test_split: test 6 | output_type: generate_until 7 | doc_to_visual: !function utils.mathvista_doc_to_visual 8 | doc_to_text: !function utils.mathvista_doc_to_text 9 | doc_to_target: "answer" 10 | generation_kwargs: 11 | until: 12 | - "ASSISTANT:" 13 | max_new_tokens: 1024 14 | temperature: 0 15 | top_p: 0 16 | num_beams: 1 17 | do_sample: false 18 | process_results: !function utils.mathvista_process_results 19 | metric_list: 20 | - metric: submission 21 | aggregation: !function utils.mathvista_aggregate_results 22 | higher_is_better: true 23 | 24 | model_specific_prompt_kwargs: 25 | default: 26 | shot_type: "format-prompt" # can be "reason-first", "solution", "step-by-step" 27 | model_specific_generation_kwargs: 28 | llava: 29 | image_aspect_ratio: original -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/textcaps/textcaps_test.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/TextCaps 2 | dataset_kwargs: 3 | token: True 4 | task : "textcaps_test" 5 | group : "textcaps_caption" 6 | test_split: test 7 | output_type: generate_until 8 | doc_to_visual: !function utils.textcaps_doc_to_visual 9 | doc_to_text: !function utils.textcaps_doc_to_text 10 | doc_to_target: "answer" 11 | generation_kwargs: 12 | max_new_tokens: 64 13 | temperature: 0 14 | top_p: 0 15 | num_beams: 1 16 | do_sample: false 17 | process_results: !function utils.textcaps_test_process_result 18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 19 | metric_list: 20 | - metric: textcaps_passthrough 21 | aggregation : !function utils.textcaps_test_aggregation_result 22 | higher_is_better : true 23 | metadata: 24 | - version: 0.0 25 | include: _default_template_textcaps_yaml -------------------------------------------------------------------------------- /Eagle/scripts/eval/mme.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | CKPT=$1 3 | NAME=$2 4 | MME_DATA_ROOT=$(readlink -f "./playground/data/eval/MME") 5 | 6 | python -m eagle.eval.model_vqa_loader \ 7 | --model-path $CKPT \ 8 | --question-file ./playground/data/eval/MME/llava_mme.jsonl \ 9 | --image-folder ./playground/data/eval/MME/MME_Benchmark_release_version \ 10 | --answers-file ./playground/data/eval/MME/answers/${NAME}.jsonl \ 11 | --temperature 0 \ 12 | --conv-mode vicuna_v1 13 | 14 | cd ./playground/data/eval/MME 15 | 16 | # python convert_answer_to_mme.py --experiment ${NAME}.jsonl 17 | 18 | # cd eval_tool 19 | 20 | # python calculation.py --results_dir answers/${NAME} 21 | 22 | python convert_answer_to_mme.py --experiment ${MME_DATA_ROOT}/answers/${NAME}.jsonl --data_path ${MME_DATA_ROOT}/MME_Benchmark_release_version 23 | 24 | cd eval_tool 25 | 26 | python calculation.py --results_dir ${MME_DATA_ROOT}/answers/${NAME}_mme_results 27 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/iconqa/_default_template_docvqa_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/ICON-QA 2 | dataset_kwargs: 3 | token: True 4 | output_type: generate_until 5 | doc_to_visual: !function utils.doc_to_visual 6 | doc_to_text: !function utils.doc_to_text 7 | doc_to_target: "answers" 8 | # process_results: !function utils.test_process_results 9 | generation_kwargs: 10 | max_new_tokens: 32 11 | temperature: 0 12 | do_sample: False 13 | model_specific_prompt_kwargs: 14 | default: 15 | pre_prompt: "" 16 | statement: "Given a set of images and a question, please provide the answer to the question.\n" 17 | options_statement: "Question: {question}.\nOptions:\n{options}\nPlease answer with the option letter from the given choices directly." 18 | freeform_statement: "Question: {question}.\nPlease answer the question using a single word or phrase." 19 | metric_list: 20 | - metric: anls 21 | aggregation: mean 22 | higher_is_better: true -------------------------------------------------------------------------------- /Eagle2_5/eaglevl/sp_utils/ring/__init__.py: -------------------------------------------------------------------------------- 1 | # Adopted from https://github.com/zhuzilin/ring-flash-attention. 2 | # Implementation refers to Ring Attention Paper: https://arxiv.org/abs/2310.01889 3 | 4 | 5 | from .ring_flash_attn import ring_flash_attn_func, ring_flash_attn_kvpacked_func, ring_flash_attn_qkvpacked_func 6 | from .ring_flash_attn_varlen import ( 7 | ring_flash_attn_varlen_func, 8 | ring_flash_attn_varlen_kvpacked_func, 9 | ring_flash_attn_varlen_qkvpacked_func, 10 | ) 11 | from .stripe_flash_attn import stripe_flash_attn_func, stripe_flash_attn_kvpacked_func, stripe_flash_attn_qkvpacked_func 12 | from .zigzag_ring_flash_attn import ( 13 | zigzag_ring_flash_attn_func, 14 | zigzag_ring_flash_attn_kvpacked_func, 15 | zigzag_ring_flash_attn_qkvpacked_func, 16 | ) 17 | from .zigzag_ring_flash_attn_varlen import ( 18 | zigzag_ring_flash_attn_varlen_func, 19 | zigzag_ring_flash_attn_varlen_qkvpacked_func, 20 | ) 21 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mathvista/mathvista_testmini.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: AI4Math/MathVista 2 | dataset_kwargs: 3 | token: True 4 | task: "mathvista_testmini" 5 | test_split: testmini 6 | output_type: generate_until 7 | doc_to_visual: !function utils.mathvista_doc_to_visual 8 | doc_to_text: !function utils.mathvista_doc_to_text 9 | doc_to_target: "answer" 10 | generation_kwargs: 11 | until: 12 | - "ASSISTANT:" 13 | max_new_tokens: 1024 14 | temperature: 0 15 | top_p: 0 16 | num_beams: 1 17 | do_sample: false 18 | process_results: !function utils.mathvista_process_results 19 | metric_list: 20 | - metric: gpt_eval_score 21 | aggregation: !function utils.mathvista_aggregate_results 22 | higher_is_better: true 23 | 24 | model_specific_prompt_kwargs: 25 | default: 26 | shot_type: "format-prompt" # can be "reason-first", "solution", "step-by-step" 27 | model_specific_generation_kwargs: 28 | llava: 29 | image_aspect_ratio: original -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/ok_vqa/_generate_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | 4 | splits = ["val2014"] 5 | tasks = ["vqa"] 6 | 7 | if __name__ == "__main__": 8 | dump_tasks = [] 9 | for task in tasks: 10 | for split in splits: 11 | yaml_dict = {"group": f"ok_vqa", "task": f"ok_vqa_{split}", "include": f"_default_template_{task}_yaml", "test_split": split} 12 | if split == "train": 13 | yaml_dict.pop("group") 14 | else: 15 | dump_tasks.append(f"ok_vqa_{split}") 16 | 17 | save_path = f"./ok_vqa_{split}.yaml" 18 | print(f"Saving to {save_path}") 19 | with open(save_path, "w") as f: 20 | yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False) 21 | 22 | group_dict = {"group": "ok_vqa", "task": dump_tasks} 23 | 24 | with open("./_ok_vqa.yaml", "w") as f: 25 | yaml.dump(group_dict, f, default_flow_style=False, indent=4) 26 | -------------------------------------------------------------------------------- /Eagle/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==2.3.1 2 | torchvision 3 | transformers==4.44.2 4 | tokenizers==0.19.1 5 | sentencepiece==0.2.0 6 | shortuuid 7 | accelerate==0.34.2 8 | peft 9 | bitsandbytes 10 | pydantic 11 | markdown2[all] 12 | numpy 13 | scikit-learn==1.2.2 14 | gradio==4.16.0 15 | gradio_client==0.8.1 16 | requests 17 | httpx==0.27.0 18 | uvicorn 19 | protobuf==3.20.0 20 | fastapi 21 | einops==0.6.1 22 | einops-exts==0.0.4 23 | timm==0.9.11 24 | opencv-python 25 | fvcore 26 | # these lib are required for the lmms-eval 27 | black==24.1.0 28 | datasets==2.16.1 29 | evaluate>=0.4.0 30 | jsonlines 31 | numexpr 32 | pybind11>=2.6.2 33 | pytablewriter 34 | rouge-score>=0.0.4 35 | sacrebleu>=1.5.0 36 | scikit-learn>=0.24.1 37 | sqlitedict 38 | openai>=1.0.0 39 | pycocoevalcap 40 | tqdm-multiprocess 41 | zstandard 42 | pyyaml 43 | sympy 44 | mpmath 45 | Jinja2 46 | openpyxl 47 | Levenshtein 48 | hf_transfer 49 | tenacity 50 | transformers-stream-generator 51 | tiktoken 52 | pre-commit 53 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/stvqa/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import logging 4 | 5 | from lmms_eval.tasks._task_utils.file_utils import generate_submission_file 6 | 7 | 8 | def stvqa_doc_to_text(doc, model_specific_prompt_kwargs): 9 | question = doc["question"] 10 | pre_prompt = model_specific_prompt_kwargs["pre_prompt"] 11 | post_prompt = model_specific_prompt_kwargs["post_prompt"] 12 | return f"{pre_prompt}{question}{post_prompt}" 13 | 14 | 15 | def stvqa_doc_to_visual(doc): 16 | return [doc["image"].convert("RGB")] 17 | 18 | 19 | def stvqa_process_results(doc, results): 20 | answer = results[0] 21 | return {"submission": {"question_id": int(doc["question_id"]), "answer": answer}} 22 | 23 | 24 | def stvqa_aggregate_submissions(results, args): 25 | file = generate_submission_file("stvqa_test_for_submission.json", args) 26 | with open(file, "w") as f: 27 | json.dump(results, f) 28 | logging.getLogger("lmms-eval").info(f"Results saved to {file}") 29 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/vizwiz_vqa/_generate_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | 4 | splits = ["val", "test"] 5 | tasks = ["vqa"] 6 | 7 | if __name__ == "__main__": 8 | dump_tasks = [] 9 | for task in tasks: 10 | for split in splits: 11 | yaml_dict = {"group": f"vizwiz_{task}", "task": f"vizwiz_{task}_{split}", "include": f"_default_template_{task}_yaml", "test_split": split} 12 | if split == "train": 13 | yaml_dict.pop("group") 14 | else: 15 | dump_tasks.append(f"vizwiz_{task}_{split}") 16 | 17 | save_path = f"./vizwiz_{task}_{split}.yaml" 18 | print(f"Saving to {save_path}") 19 | with open(save_path, "w") as f: 20 | yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False) 21 | 22 | group_dict = {"group": "vizwiz_vqa", "task": dump_tasks} 23 | 24 | with open("./_vizwiz_vqa.yaml", "w") as f: 25 | yaml.dump(group_dict, f, default_flow_style=False, indent=4) 26 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/chartqa/chartqa.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/ChartQA 2 | dataset_kwargs: 3 | token: True 4 | task: "chartqa" 5 | test_split: test 6 | output_type: generate_until 7 | doc_to_visual: !function utils.chartqa_doc_to_visual 8 | doc_to_text: !function utils.chartqa_doc_to_text 9 | doc_to_target: "answer" 10 | generation_kwargs: 11 | max_new_tokens: 16 12 | temperature: 0 13 | do_sample: False 14 | process_results: !function utils.chartqa_process_results 15 | metric_list: 16 | - metric: relaxed_overall 17 | aggregation: mean 18 | higher_is_better: true 19 | - metric: relaxed_human_split 20 | aggregation: mean 21 | higher_is_better: true 22 | - metric: relaxed_augmented_split 23 | aggregation: mean 24 | higher_is_better: true 25 | metadata: 26 | - version: 0.0 27 | model_specific_prompt_kwargs: 28 | default: 29 | pre_prompt: "" 30 | post_prompt: "\nAnswer the question with a single word." 31 | qwen_vl: 32 | pre_prompt: "" 33 | post_prompt: " Answer:" 34 | 35 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/ai2d/ai2d.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/ai2d 2 | task: "ai2d" 3 | dataset_kwargs: 4 | token: True 5 | test_split: test 6 | output_type: generate_until 7 | doc_to_visual: !function utils.ai2d_doc_to_visual 8 | doc_to_text: !function utils.ai2d_doc_to_text 9 | doc_to_target: !function utils.ai2d_doc_to_target 10 | generation_kwargs: 11 | max_new_tokens: 16 12 | temperature: 0 13 | do_sample: False 14 | metric_list: 15 | - metric: exact_match 16 | aggregation: mean 17 | higher_is_better: true 18 | ignore_case: true 19 | ignore_punctuation: true 20 | metadata: 21 | - version: 0.0 22 | 23 | model_specific_prompt_kwargs: 24 | default: 25 | prompt_format: mcq 26 | pre_prompt: "" 27 | post_prompt: "\nAnswer with the option's letter from the given choices directly." 28 | # qwen formulate ai2d as question answering instead of mcq 29 | qwen_vl: 30 | prompt_format: qa 31 | pre_prompt: "" 32 | post_prompt: " Answer:" 33 | 34 | model_specific_target_kwargs: 35 | default: "mcq" 36 | qwen_vl: "qa" -------------------------------------------------------------------------------- /Eagle/lmms_eval/api/instance.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import Literal, Tuple 3 | 4 | 5 | @dataclass 6 | class Instance: 7 | request_type: Literal["loglikelihood", "generate_until"] 8 | arguments: tuple 9 | idx: int 10 | metadata: Tuple[str, int, int] = field(default_factory=lambda: (None, None, None)) # TODO: better typehints here 11 | resps: list = field(default_factory=list) 12 | filtered_resps: dict = field(default_factory=dict) 13 | 14 | # initialized after init 15 | task_name: str = None 16 | doc_id: str = None 17 | repeats: str = None 18 | doc: dict = None 19 | 20 | def __post_init__(self) -> None: 21 | # unpack metadata field 22 | self.task_name, self.doc_id, self.repeats = self.metadata 23 | 24 | @property 25 | def args(self): 26 | """ 27 | Returns (string,) where `string` is the string to calculate loglikelihood over 28 | """ 29 | return self.arguments if isinstance(self.arguments, tuple) else (self.arguments,) 30 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/scienceqa/scienceqa.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/ScienceQA 2 | dataset_name: ScienceQA-FULL 3 | task: "scienceqa" 4 | dataset_kwargs: 5 | token: True 6 | test_split: test 7 | output_type: generate_until 8 | doc_to_visual: !function utils.sqa_doc_to_visual 9 | doc_to_text: !function utils.sqa_doc_to_text 10 | doc_to_target: !function utils.sqa_doc_to_target 11 | generation_kwargs: 12 | max_new_tokens: 16 13 | temperature: 0 14 | do_sample: False 15 | metric_list: 16 | - metric: exact_match 17 | aggregation: mean 18 | higher_is_better: true 19 | ignore_case: true 20 | ignore_punctuation: true 21 | process_results: !function utils.sqa_process_results 22 | metadata: 23 | - version: 0.0 24 | 25 | model_specific_prompt_kwargs: 26 | default: 27 | format: default 28 | pre_prompt: "" 29 | post_prompt: "\nAnswer with the option's letter from the given choices directly." 30 | qwen_vl: 31 | format: qwen_vl 32 | 33 | model_specific_generation_kwargs: 34 | llava: 35 | image_aspect_ratio: original 36 | 37 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/scienceqa/scienceqa_img.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/ScienceQA 2 | dataset_name: ScienceQA-IMG 3 | task: "scienceqa_img" 4 | dataset_kwargs: 5 | token: True 6 | test_split: test 7 | output_type: generate_until 8 | doc_to_visual: !function utils.sqa_doc_to_visual 9 | doc_to_text: !function utils.sqa_doc_to_text 10 | doc_to_target: !function utils.sqa_doc_to_target 11 | generation_kwargs: 12 | max_new_tokens: 16 13 | temperature: 0 14 | do_sample: False 15 | metric_list: 16 | - metric: exact_match 17 | aggregation: mean 18 | higher_is_better: true 19 | ignore_case: true 20 | ignore_punctuation: true 21 | process_results: !function utils.sqa_process_results 22 | metadata: 23 | - version: 0.0 24 | 25 | model_specific_prompt_kwargs: 26 | default: 27 | format: default 28 | pre_prompt: "" 29 | post_prompt: "\nAnswer with the option's letter from the given choices directly." 30 | qwen_vl: 31 | format: qwen_vl 32 | model_specific_generation_kwargs: 33 | llava: 34 | image_aspect_ratio: original 35 | 36 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcocog/_generate_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | 4 | # splits = ["train", "test", "val"] 5 | splits = ["test", "val"] 6 | tasks = ["seg", "bbox"] 7 | 8 | if __name__ == "__main__": 9 | dump_tasks = [] 10 | for task in tasks: 11 | for split in splits: 12 | yaml_dict = {"group": f"refcocog_{task}", "task": f"refcocog_{task}_{split}", "include": f"_default_template_{task}_yaml", "test_split": split} 13 | if split == "train": 14 | yaml_dict.pop("group") 15 | else: 16 | dump_tasks.append(f"refcoco_{task}_{split}") 17 | 18 | save_path = f"./refcocog_{task}_{split}.yaml" 19 | print(f"Saving to {save_path}") 20 | with open(save_path, "w") as f: 21 | yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False) 22 | 23 | group_dict = {"group": "refcocog", "task": dump_tasks} 24 | 25 | with open("./_refcoco.yaml", "w") as f: 26 | yaml.dump(group_dict, f, default_flow_style=False, indent=4) 27 | -------------------------------------------------------------------------------- /Eagle2_5/deepspeed_configs/zero_stage2_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "zero_optimization": { 3 | "stage": 2, 4 | "allgather_partitions": true, 5 | "allgather_bucket_size": 1e8, 6 | "overlap_comm": true, 7 | "reduce_scatter": true, 8 | "reduce_bucket_size": 1e8, 9 | "contiguous_gradients": true 10 | }, 11 | "fp16": { 12 | "enabled": "auto", 13 | "auto_cast": true, 14 | "loss_scale": 0, 15 | "initial_scale_power": 32, 16 | "loss_scale_window": 1000, 17 | "hysteresis": 2, 18 | "min_loss_scale": 1 19 | }, 20 | "bf16": { 21 | "enabled": "auto" 22 | }, 23 | "optimizer": { 24 | "type": "AdamW", 25 | "params": { 26 | "lr": "auto", 27 | "betas": [ 28 | 0.9, 29 | 0.999 30 | ], 31 | "eps": 1e-8, 32 | "weight_decay": "auto" 33 | } 34 | }, 35 | "gradient_accumulation_steps": "auto", 36 | "gradient_clipping": "auto", 37 | "steps_per_print": 2000, 38 | "train_batch_size": "auto", 39 | "train_micro_batch_size_per_gpu": "auto", 40 | "wall_clock_breakdown": false 41 | } 42 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/seedbench/seedbench.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/SEED-Bench 2 | dataset_kwargs: 3 | token: True 4 | task: "seedbench" 5 | test_split: test 6 | output_type: generate_until 7 | doc_to_visual: !function utils.seed_doc_to_visual 8 | doc_to_text: !function utils.seed_doc_to_text 9 | doc_to_target: "answer" 10 | generation_kwargs: 11 | until: 12 | - "ASSISTANT:" 13 | image_aspect_ratio: original 14 | # The return value of process_results will be used by metrics 15 | process_results: !function utils.seed_process_result 16 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 17 | metric_list: 18 | - metric: seed_image 19 | aggregation: !function utils.seed_aggregation_result 20 | higher_is_better: true 21 | - metric: seed_video 22 | aggregation: !function utils.seed_aggregation_result 23 | higher_is_better: true 24 | - metric: seed_all 25 | aggregation: !function utils.seed_aggregation_result 26 | higher_is_better: true 27 | metadata: 28 | - version: 0.0 -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco+/_generate_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | 4 | # splits = ["train", "val", "testA", "testB"] 5 | splits = ["val", "testA", "testB"] 6 | tasks = ["seg", "bbox"] 7 | 8 | if __name__ == "__main__": 9 | dump_tasks = [] 10 | for task in tasks: 11 | for split in splits: 12 | yaml_dict = {"group": f"refcoco+_{task}", "task": f"refcoco+_{task}_{split}", "include": f"_default_template_{task}_yaml", "test_split": split} 13 | if split == "train": 14 | yaml_dict.pop("group") 15 | else: 16 | dump_tasks.append(f"refcoco_{task}_{split}") 17 | 18 | save_path = f"./refcoco+_{task}_{split}.yaml" 19 | print(f"Saving to {save_path}") 20 | with open(save_path, "w") as f: 21 | yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False) 22 | 23 | group_dict = {"group": "refcoco+", "task": dump_tasks} 24 | 25 | with open("./_refcoco.yaml", "w") as f: 26 | yaml.dump(group_dict, f, default_flow_style=False, indent=4) 27 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco/_generate_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | 4 | # splits = ["train", "test", "val", "testA", "testB"] 5 | splits = ["test", "val", "testA", "testB"] 6 | tasks = ["seg", "bbox"] 7 | 8 | if __name__ == "__main__": 9 | dump_tasks = [] 10 | for task in tasks: 11 | for split in splits: 12 | yaml_dict = {"group": f"refcoco_{task}", "task": f"refcoco_{task}_{split}", "test_split": split, "include": f"_default_template_{task}_yaml"} 13 | if split == "train": 14 | yaml_dict.pop("group") 15 | else: 16 | dump_tasks.append(f"refcoco_{task}_{split}") 17 | 18 | save_path = f"./refcoco_{task}_{split}.yaml" 19 | print(f"Saving to {save_path}") 20 | with open(save_path, "w") as f: 21 | yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False) 22 | 23 | group_dict = {"group": "refcoco", "task": dump_tasks} 24 | 25 | with open("./_refcoco.yaml", "w") as f: 26 | yaml.dump(group_dict, f, default_flow_style=False, indent=4) 27 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mmbench/mmbench_cc.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/MMBench 2 | dataset_name: cc 3 | dataset_kwargs: 4 | token: True 5 | task: "mmbench_cn_cc" 6 | test_split: test 7 | output_type: generate_until 8 | doc_to_visual: !function cc_utils.mmbench_doc_to_visual 9 | doc_to_text: !function cc_utils.mmbench_cn_cc_doc_to_text 10 | doc_to_target: "answer" 11 | generation_kwargs: 12 | max_new_tokens: 256 13 | temperature: 0 14 | top_p: 0 15 | num_beams: 1 16 | do_sample: false 17 | process_results: !function cc_utils.mmbench_cn_cc_process_results 18 | metric_list: 19 | - metric: gpt_eval_score 20 | aggregation: !function cc_utils.mmbench_cn_cc_aggregate_dev_results_eval 21 | higher_is_better: true 22 | - metric: submission 23 | aggregation: !function cc_utils.mmbench_cn_cc_aggregate_results 24 | metadata: 25 | version: 0.0 26 | gpt_eval_model_name: "gpt-3.5-turbo-0613" 27 | 28 | model_specific_prompt_kwargs: 29 | default: 30 | pre_prompt: "" 31 | post_prompt: "\n请直接使用所提供的选项字母作为答案回答。" 32 | model_specific_generation_kwargs: 33 | llava: 34 | image_aspect_ratio: original -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mmvet/mmvet.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/MMVet 2 | dataset_kwargs: 3 | token: True 4 | task: "mmvet" 5 | test_split: test 6 | output_type: generate_until 7 | doc_to_visual: !function utils.mmvet_doc_to_visual 8 | doc_to_text: !function utils.doc_to_text # Such that {{question}} will be replaced by doc["question"] 9 | doc_to_target: "{{answer}}" 10 | generation_kwargs: 11 | until: 12 | - "ASSISTANT:" 13 | max_new_tokens: 1024 14 | temperature: 0 15 | top_p: 0 16 | num_beams: 1 17 | do_sample: false 18 | process_results: !function utils.mmvet_process_results # apply gpt eval here 19 | metric_list: 20 | - metric: gpt_eval_score 21 | aggregation: !function utils.mmvet_aggregate_results 22 | higher_is_better: true 23 | metadata: 24 | version: 0.0 25 | # gpt_eval_model_name: "gpt-4-0613" 26 | # gpt_eval_model_name: "gpt-4-0125-preview" 27 | # gpt_eval_model_name: "gpt-4o-2024-05-13" 28 | gpt_eval_model_name: "gpt-4" 29 | # gpt_eval_model_name: "gpt-3.5-turbo-0613" 30 | model_specific_prompt_kwargs: 31 | default: 32 | pre_prompt: "" 33 | post_prompt: "" 34 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/infovqa/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import logging 4 | 5 | 6 | from lmms_eval.tasks._task_utils.file_utils import generate_submission_file 7 | 8 | lmms_logger = logging.getLogger("lmms-eval") 9 | 10 | 11 | def infovqa_doc_to_visual(doc): 12 | return [doc["image"].convert("RGB")] 13 | 14 | 15 | def infovqa_doc_to_text(doc, model_specific_prompt_kwargs): 16 | question = doc["question"] 17 | pre_prompt = model_specific_prompt_kwargs["pre_prompt"] 18 | post_prompt = model_specific_prompt_kwargs["post_prompt"] 19 | return f"{pre_prompt}{question}{post_prompt}" 20 | 21 | 22 | def infovqa_test_process_results(doc, results): 23 | pred = results[0] 24 | questionId = doc["questionId"] 25 | return {"submission": {"questionId": int(questionId), "answer": pred}} 26 | 27 | 28 | def infovqa_test_aggregate_results(results, args): 29 | # save results as json 30 | file = generate_submission_file("infovqa_test_for_submission.json", args) 31 | with open(file, "w") as f: 32 | json.dump(results, f) 33 | lmms_logger.info(f"Results saved to {file}") 34 | -------------------------------------------------------------------------------- /Eagle/scripts/convert_mmbench_for_submission.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import argparse 4 | import pandas as pd 5 | 6 | def get_args(): 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument("--annotation-file", type=str, required=True) 9 | parser.add_argument("--result-dir", type=str, required=True) 10 | parser.add_argument("--upload-dir", type=str, required=True) 11 | parser.add_argument("--experiment", type=str, required=True) 12 | 13 | return parser.parse_args() 14 | 15 | if __name__ == "__main__": 16 | args = get_args() 17 | 18 | df = pd.read_table(args.annotation_file) 19 | 20 | cur_df = df.copy() 21 | cur_df = cur_df.drop(columns=['hint', 'category', 'source', 'image', 'comment', 'l2-category']) 22 | cur_df.insert(6, 'prediction', None) 23 | for pred in open(os.path.join(args.result_dir, f"{args.experiment}.jsonl")): 24 | pred = json.loads(pred) 25 | cur_df.loc[df['index'] == pred['question_id'], 'prediction'] = pred['text'] 26 | 27 | cur_df.to_excel(os.path.join(args.upload_dir, f"{args.experiment}.xlsx"), index=False, engine='openpyxl') 28 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/pope/pope.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/POPE 2 | dataset_kwargs: 3 | token: True 4 | task: "pope" 5 | test_split: test 6 | output_type: generate_until 7 | doc_to_visual: !function utils.pope_doc_to_visual 8 | doc_to_text: !function utils.pope_doc_to_text 9 | doc_to_target: "answer" 10 | generation_kwargs: 11 | max_new_tokens: 128 12 | temperature: 0 13 | top_p: 0 14 | num_beams: 1 15 | do_sample: false 16 | process_results: !function utils.pope_process_results 17 | metric_list: 18 | - metric: pope_accuracy 19 | aggregation: !function utils.pope_aggregate_accuracy 20 | higher_is_better: true 21 | - metric: pope_precision 22 | aggregation: !function utils.pope_aggregate_precision 23 | higher_is_better: true 24 | - metric: pope_recall 25 | aggregation: !function utils.pope_aggregate_recall 26 | higher_is_better: true 27 | - metric: pope_f1_score 28 | aggregation: !function utils.pope_aggregate_f1_score 29 | higher_is_better: true 30 | - metric: pope_yes_ratio 31 | aggregation: !function utils.pope_aggregate_yes_ratio 32 | higher_is_better: true 33 | metadata: 34 | - version: 0.0 -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/docvqa/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import logging 4 | 5 | from lmms_eval.tasks._task_utils.file_utils import generate_submission_file 6 | 7 | logger = logging.getLogger("lmms-eval") 8 | 9 | 10 | def docvqa_doc_to_visual(doc): 11 | return [doc["image"].convert("RGB")] 12 | 13 | 14 | def docvqa_doc_to_text(doc, model_specific_prompt_kwargs): 15 | question = doc["question"] 16 | pre_prompt = model_specific_prompt_kwargs["pre_prompt"] 17 | post_prompt = model_specific_prompt_kwargs["post_prompt"] 18 | return f"{pre_prompt}{question}{post_prompt}" 19 | 20 | 21 | def docvqa_test_process_results(doc, results): 22 | pred = results[0] 23 | questionId = doc["questionId"] 24 | return {"anls": {"questionId": int(questionId), "answer": pred}, "submission": {"questionId": int(questionId), "answer": pred}} 25 | 26 | 27 | def docvqa_test_aggregate_results(results, args): 28 | # save results as json 29 | path = generate_submission_file("docvqa_test_for_submission.json", args) 30 | with open(path, "w") as f: 31 | json.dump(results, f) 32 | logger.info(f"Results saved to {path}") 33 | -------------------------------------------------------------------------------- /Eagle2_5/deepspeed_configs/zero_stage1_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "zero_optimization": { 3 | "stage": 1, 4 | "allgather_partitions": true, 5 | "allgather_bucket_size": 1e9, 6 | "overlap_comm": true, 7 | "reduce_scatter": true, 8 | "reduce_bucket_size": 1e9, 9 | "contiguous_gradients": true 10 | }, 11 | "fp16": { 12 | "enabled": "auto", 13 | "auto_cast": true, 14 | "loss_scale": 0, 15 | "initial_scale_power": 32, 16 | "loss_scale_window": 1000, 17 | "hysteresis": 2, 18 | "min_loss_scale": 1 19 | }, 20 | "bf16": { 21 | "enabled": "auto" 22 | }, 23 | "optimizer": { 24 | "type": "AdamW", 25 | "params": { 26 | "lr": "auto", 27 | "betas": [ 28 | 0.9, 29 | 0.999 30 | ], 31 | "eps": 1e-8, 32 | "weight_decay": "auto" 33 | } 34 | }, 35 | "gradient_accumulation_steps": "auto", 36 | "gradient_clipping": "auto", 37 | "steps_per_print": 2000, 38 | "train_batch_size": "auto", 39 | "train_micro_batch_size_per_gpu": "auto", 40 | "wall_clock_breakdown": true 41 | } 42 | -------------------------------------------------------------------------------- /Eagle2_5/deployment/setup_x86.dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvcr.io/nvidia/tensorrt:25.06-py3 2 | 3 | RUN chmod 1777 /tmp 4 | # RUN apt update && apt install -y libturbojpeg libsm6 libxext6 -y 5 | # RUN apt install libgl1-mesa-glx libsm6 libxext6 -y 6 | 7 | # RUN pip install setuptools==65.5.1 debugpy einops tqdm numpy pandas 8 | # RUN pip install llvmlite==0.41.0 9 | # RUN pip install numba==0.58.0 scikit-image==0.18.3 "matplotlib<3.6.0" 10 | 11 | RUN pip install onnx onnxsim onnxruntime onnx_graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com 12 | RUN pip install pycuda fvcore timm peft liger_kernel 13 | RUN pip install transformers==4.51.0 accelerate 14 | RUN pip install qwen-vl-utils[decord]==0.0.8 15 | RUN FLASH_ATTENTION_FORCE_BUILD=TRUE MAX_JOBS=16 pip install flash-attn 16 | RUN pip install dill==0.3.7 17 | RUN pip install mpi4py 18 | 19 | # Update cmake version from 3.24 to 3.27 20 | RUN apt update; \ 21 | apt install -y build-essential libssl-dev; \ 22 | wget https://github.com/Kitware/CMake/releases/download/v3.27.6/cmake-3.27.6.tar.gz; \ 23 | tar xf cmake-3.27.6.tar.gz; \ 24 | cd cmake-3.27.6; \ 25 | ./bootstrap; \ 26 | make -j$(nproc); \ 27 | make install 28 | 29 | RUN apt install ninja-build 30 | -------------------------------------------------------------------------------- /Eagle2_5/deepspeed_configs/zero_stage3_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "zero_optimization": { 3 | "stage": 3, 4 | "overlap_comm": true, 5 | "contiguous_gradients": true, 6 | "sub_group_size": 1e9, 7 | "reduce_bucket_size": 1e9, 8 | "stage3_prefetch_bucket_size": 1e9, 9 | "stage3_param_persistence_threshold": 1e7, 10 | "stage3_max_live_parameters": 1e9, 11 | "stage3_max_reuse_distance": 1e9, 12 | "stage3_gather_16bit_weights_on_model_save": true 13 | }, 14 | "fp16": { 15 | "enabled": "auto", 16 | "auto_cast": true, 17 | "loss_scale": 0, 18 | "initial_scale_power": 32, 19 | "loss_scale_window": 1000, 20 | "hysteresis": 2, 21 | "min_loss_scale": 1 22 | }, 23 | "bf16": { 24 | "enabled": "auto" 25 | }, 26 | "optimizer": { 27 | "type": "AdamW", 28 | "params": { 29 | "lr": "auto", 30 | "betas": [ 31 | 0.9, 32 | 0.999 33 | ], 34 | "eps": 1e-8, 35 | "weight_decay": "auto" 36 | } 37 | }, 38 | "gradient_accumulation_steps": "auto", 39 | "gradient_clipping": "auto", 40 | "steps_per_print": 2000, 41 | "train_batch_size": "auto", 42 | "train_micro_batch_size_per_gpu": "auto", 43 | "wall_clock_breakdown": true 44 | } 45 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/mme/mme.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/MME 2 | dataset_kwargs: 3 | token: True 4 | task: "mme" 5 | test_split: test 6 | output_type: generate_until 7 | doc_to_visual: !function utils.mme_doc_to_visual 8 | doc_to_text: !function utils.mme_doc_to_text 9 | doc_to_target: "answer" 10 | generation_kwargs: 11 | max_new_tokens: 16 12 | temperature: 0 13 | top_p: 0 14 | num_beams: 1 15 | do_sample: false 16 | # The return value of process_results will be used by metrics 17 | process_results: !function utils.mme_process_results 18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 19 | metric_list: 20 | - metric: mme_percetion_score 21 | aggregation: !function utils.mme_aggregate_results 22 | higher_is_better: true 23 | - metric: mme_cognition_score 24 | aggregation: !function utils.mme_aggregate_results 25 | higher_is_better: true 26 | model_specific_prompt_kwargs: 27 | default: 28 | pre_prompt: "" 29 | post_prompt: "\nAnswer the question using a single word or phrase." 30 | qwen_vl: 31 | pre_prompt: "" 32 | post_prompt: " Answer:" 33 | otterhd: 34 | pre_prompt: "" 35 | post_prompt: " Answer:" 36 | metadata: 37 | - version: 0.0 38 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/llava-bench-coco/llava-bench-coco.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/llava-bench-coco 2 | dataset_kwargs: 3 | token: True 4 | task: "llava_bench_coco" 5 | test_split: train 6 | output_type: generate_until 7 | doc_to_visual: !function utils.llava_doc_to_visual 8 | doc_to_text: !function utils.llava_doc_to_text 9 | doc_to_target: "gpt_answer" 10 | generation_kwargs: 11 | until: 12 | - "ASSISTANT:" 13 | image_aspect_ratio: original 14 | max_new_tokens: 1024 15 | temperature: 0 16 | top_p: 0 17 | num_beams: 1 18 | process_results: !function utils.llava_process_results 19 | metric_list: 20 | - metric: gpt_eval_llava_all 21 | aggregation: !function utils.llava_all_aggregation 22 | higher_is_better: true 23 | - metric: gpt_eval_llava_conv 24 | aggregation: !function utils.llava_conv_aggregation 25 | higher_is_better: true 26 | - metric: gpt_eval_llava_detail 27 | aggregation: !function utils.llava_detail_aggregation 28 | higher_is_better: true 29 | - metric: gpt_eval_llava_complex 30 | aggregation: !function utils.llava_complex_aggregation 31 | higher_is_better: true 32 | metadata: 33 | version: 0.0 34 | gpt_eval_model_name: "gpt-4-0314" 35 | model_specific_prompt_kwargs: 36 | default: 37 | pre_prompt: "" 38 | post_prompt: "" -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/ferret/ferret.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/Ferret-Bench 2 | dataset_kwargs: 3 | token: True 4 | task: "ferret" 5 | test_split: test 6 | output_type: generate_until 7 | doc_to_visual: !function utils.ferret_doc_to_visual 8 | doc_to_text: !function utils.ferret_doc_to_text 9 | doc_to_target: "gpt_answer" 10 | generation_kwargs: 11 | until: 12 | - "ASSISTANT:" 13 | image_aspect_ratio: original 14 | max_new_tokens: 1024 15 | temperature: 0 16 | top_p: 0 17 | num_beams: 1 18 | do_sample: false 19 | process_results: !function utils.ferret_process_results 20 | metric_list: 21 | - metric: gpt_eval_ferret_all 22 | aggregation: !function utils.ferret_all_aggregation 23 | higher_is_better: true 24 | - metric: gpt_eval_ferret_refer_desc 25 | aggregation: !function utils.ferret_refer_desc_aggregation 26 | higher_is_better: true 27 | - metric: gpt_eval_ferret_refer_reason 28 | aggregation: !function utils.ferret_refer_reason_aggregation 29 | higher_is_better: true 30 | - metric: gpt_eval_ferret_ground_conv 31 | aggregation: !function utils.ferret_ground_conv_aggregation 32 | higher_is_better: true 33 | metadata: 34 | version: 0.0 35 | gpt_eval_model_name: "gpt-4-0314" 36 | model_specific_prompt_kwargs: 37 | default: 38 | pre_prompt: "" 39 | post_prompt: "" -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/llava-in-the-wild/llava-in-the-wild.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/llava-bench-in-the-wild 2 | dataset_kwargs: 3 | token: True 4 | task: "llava_in_the_wild" 5 | test_split: train 6 | output_type: generate_until 7 | doc_to_visual: !function utils.llava_doc_to_visual 8 | doc_to_text: !function utils.llava_doc_to_text 9 | doc_to_target: "gpt_answer" 10 | generation_kwargs: 11 | until: 12 | - "ASSISTANT:" 13 | image_aspect_ratio: original 14 | max_new_tokens: 1024 15 | temperature: 0 16 | top_p: 0 17 | num_beams: 1 18 | do_sample: false 19 | process_results: !function utils.llava_process_results 20 | metric_list: 21 | - metric: gpt_eval_llava_all 22 | aggregation: !function utils.llava_all_aggregation 23 | higher_is_better: true 24 | - metric: gpt_eval_llava_conv 25 | aggregation: !function utils.llava_conv_aggregation 26 | higher_is_better: true 27 | - metric: gpt_eval_llava_detail 28 | aggregation: !function utils.llava_detail_aggregation 29 | higher_is_better: true 30 | - metric: gpt_eval_llava_complex 31 | aggregation: !function utils.llava_complex_aggregation 32 | higher_is_better: true 33 | metadata: 34 | version: 0.0 35 | gpt_eval_model_name: "gpt-4-0613" 36 | model_specific_prompt_kwargs: 37 | default: 38 | pre_prompt: "" 39 | post_prompt: "" 40 | -------------------------------------------------------------------------------- /Eagle2_5/document/0.onboarding.md: -------------------------------------------------------------------------------- 1 | # 🦅 Onboarding Eagle 2.5 2 | 3 | This guide provides step-by-step instructions from the environment setup to the data preparation, training, and inference of the Eagle VLM. 4 | 5 | --- 6 | 7 | ### 📋 Main Steps 8 | 9 | ``` 10 | # Clone the repository 11 | git clone -b main --single-branch https://github.com/NVlabs/Eagle.git 12 | 13 | # Navigate to the working directory (Eagle 2.5) 14 | cd Eagle/Eagle2_5 15 | 16 | ``` 17 | > ⚠️ Note: All commands below should be executed within the `Eagle/Eagle2_5` directory. 18 | 19 | 20 | 1. 🦅 [Installing Eagle](./1.installing.md) 21 | Install Eagle and all necessary dependencies. 22 | 23 | 2. 📊 [Preparing Data](./2.preparing_playground.md) 24 | Prepare your dataset in the required format. 25 | 26 | 3. 💪 [Starting Training](./3.training.md) 27 | Train the Eagle model using your prepared data. 28 | 29 | 4. ✨ [Launching Streamlit Demo](./4.streamlit_demo.md) 30 | Run an interactive Streamlit demo to visualize results. 31 | 32 | 5. 🔮 [Model Inference](./5.inference.md) 33 | Perform inference using the trained model. 34 | 35 | --- 36 | 37 | ### 📎 Others 38 | 39 | - 📖 [Explain Script Arguments](./explain_script_arguments.md) 40 | - 📖 [How to Use Lmdb](./how_to_use_lmdb_to_read_images.md) 41 | - 📖 [TensorRT-LLM Deployment](../deployment/README.md) -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco/_default_template_seg_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/RefCOCO 2 | output_type: generate_until 3 | doc_to_visual: !function utils.refcoco_seg_doc_to_visual 4 | doc_to_text: !function utils.refcoco_doc_to_text 5 | doc_to_target: "answer" 6 | generation_kwargs: 7 | until: 8 | - "ASSISTANT:" 9 | process_results: !function utils.refcoco_process_result 10 | metric_list: 11 | - metric: refcoco_Bleu_4 12 | aggregation : !function utils.refcoco_bleu4 13 | higher_is_better : true 14 | - metric: refcoco_Bleu_3 15 | aggregation : !function utils.refcoco_bleu3 16 | higher_is_better : true 17 | - metric: refcoco_Bleu_2 18 | aggregation : !function utils.refcoco_bleu2 19 | higher_is_better : true 20 | - metric: refcoco_Bleu_1 21 | aggregation : !function utils.refcoco_bleu1 22 | higher_is_better : true 23 | - metric: refcoco_METEOR 24 | aggregation : !function utils.refcoco_meteor 25 | higher_is_better : true 26 | - metric: refcoco_ROUGE_L 27 | aggregation : !function utils.refcoco_rougel 28 | higher_is_better : true 29 | - metric: refcoco_CIDEr 30 | aggregation : !function utils.refcoco_cider 31 | higher_is_better : true 32 | #- metric: refcoco_SPICE 33 | # aggregation : !function utils.refcoco_spice 34 | # higher_is_better : true 35 | metadata: 36 | version: '0.0' -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco+/_default_template_seg_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/RefCOCOplus 2 | output_type: generate_until 3 | doc_to_visual: !function utils.refcoco_seg_doc_to_visual 4 | doc_to_text: !function utils.refcoco_doc_to_text 5 | doc_to_target: "answer" 6 | generation_kwargs: 7 | until: 8 | - "ASSISTANT:" 9 | process_results: !function utils.refcoco_process_result 10 | metric_list: 11 | - metric: refcoco_Bleu_4 12 | aggregation : !function utils.refcoco_bleu4 13 | higher_is_better : true 14 | - metric: refcoco_Bleu_3 15 | aggregation : !function utils.refcoco_bleu3 16 | higher_is_better : true 17 | - metric: refcoco_Bleu_2 18 | aggregation : !function utils.refcoco_bleu2 19 | higher_is_better : true 20 | - metric: refcoco_Bleu_1 21 | aggregation : !function utils.refcoco_bleu1 22 | higher_is_better : true 23 | - metric: refcoco_METEOR 24 | aggregation : !function utils.refcoco_meteor 25 | higher_is_better : true 26 | - metric: refcoco_ROUGE_L 27 | aggregation : !function utils.refcoco_rougel 28 | higher_is_better : true 29 | - metric: refcoco_CIDEr 30 | aggregation : !function utils.refcoco_cider 31 | higher_is_better : true 32 | #- metric: refcoco_SPICE 33 | # aggregation : !function utils.refcoco_spice 34 | # higher_is_better : true 35 | metadata: 36 | version: '0.0' -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco/_default_template_bbox_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/RefCOCO 2 | output_type: generate_until 3 | doc_to_visual: !function utils.refcoco_bbox_doc_to_visual 4 | doc_to_text: !function utils.refcoco_doc_to_text 5 | doc_to_target: "answer" 6 | generation_kwargs: 7 | until: 8 | - "ASSISTANT:" 9 | process_results: !function utils.refcoco_process_result 10 | metric_list: 11 | - metric: refcoco_Bleu_4 12 | aggregation : !function utils.refcoco_bleu4 13 | higher_is_better : true 14 | - metric: refcoco_Bleu_3 15 | aggregation : !function utils.refcoco_bleu3 16 | higher_is_better : true 17 | - metric: refcoco_Bleu_2 18 | aggregation : !function utils.refcoco_bleu2 19 | higher_is_better : true 20 | - metric: refcoco_Bleu_1 21 | aggregation : !function utils.refcoco_bleu1 22 | higher_is_better : true 23 | - metric: refcoco_METEOR 24 | aggregation : !function utils.refcoco_meteor 25 | higher_is_better : true 26 | - metric: refcoco_ROUGE_L 27 | aggregation : !function utils.refcoco_rougel 28 | higher_is_better : true 29 | - metric: refcoco_CIDEr 30 | aggregation : !function utils.refcoco_cider 31 | higher_is_better : true 32 | #- metric: refcoco_SPICE 33 | # aggregation : !function utils.refcoco_spice 34 | # higher_is_better : true 35 | metadata: 36 | version: '0.0' -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcocog/_default_template_bbox_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/RefCOCOg 2 | output_type: generate_until 3 | doc_to_visual: !function utils.refcoco_bbox_doc_to_visual 4 | doc_to_text: !function utils.refcoco_doc_to_text 5 | doc_to_target: "answer" 6 | generation_kwargs: 7 | until: 8 | - "ASSISTANT:" 9 | process_results: !function utils.refcoco_process_result 10 | metric_list: 11 | - metric: refcoco_Bleu_4 12 | aggregation : !function utils.refcoco_bleu4 13 | higher_is_better : true 14 | - metric: refcoco_Bleu_3 15 | aggregation : !function utils.refcoco_bleu3 16 | higher_is_better : true 17 | - metric: refcoco_Bleu_2 18 | aggregation : !function utils.refcoco_bleu2 19 | higher_is_better : true 20 | - metric: refcoco_Bleu_1 21 | aggregation : !function utils.refcoco_bleu1 22 | higher_is_better : true 23 | - metric: refcoco_METEOR 24 | aggregation : !function utils.refcoco_meteor 25 | higher_is_better : true 26 | - metric: refcoco_ROUGE_L 27 | aggregation : !function utils.refcoco_rougel 28 | higher_is_better : true 29 | - metric: refcoco_CIDEr 30 | aggregation : !function utils.refcoco_cider 31 | higher_is_better : true 32 | #- metric: refcoco_SPICE 33 | # aggregation : !function utils.refcoco_spice 34 | # higher_is_better : true 35 | metadata: 36 | version: '0.0' -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcocog/_default_template_seg_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/RefCOCOg 2 | output_type: generate_until 3 | doc_to_visual: !function utils.refcoco_seg_doc_to_visual 4 | doc_to_text: !function utils.refcoco_doc_to_text 5 | doc_to_target: "answer" 6 | generation_kwargs: 7 | until: 8 | - "ASSISTANT:" 9 | process_results: !function utils.refcoco_process_result 10 | metric_list: 11 | - metric: refcoco_Bleu_4 12 | aggregation : !function utils.refcoco_bleu4 13 | higher_is_better : true 14 | - metric: refcoco_Bleu_3 15 | aggregation : !function utils.refcoco_bleu3 16 | higher_is_better : true 17 | - metric: refcoco_Bleu_2 18 | aggregation : !function utils.refcoco_bleu2 19 | higher_is_better : true 20 | - metric: refcoco_Bleu_1 21 | aggregation : !function utils.refcoco_bleu1 22 | higher_is_better : true 23 | - metric: refcoco_METEOR 24 | aggregation : !function utils.refcoco_meteor 25 | higher_is_better : true 26 | - metric: refcoco_ROUGE_L 27 | aggregation : !function utils.refcoco_rougel 28 | higher_is_better : true 29 | - metric: refcoco_CIDEr 30 | aggregation : !function utils.refcoco_cider 31 | higher_is_better : true 32 | #- metric: refcoco_SPICE 33 | # aggregation : !function utils.refcoco_spice 34 | # higher_is_better : true 35 | metadata: 36 | version: '0.0' -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/refcoco+/_default_template_bbox_yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/RefCOCOplus 2 | output_type: generate_until 3 | doc_to_visual: !function utils.refcoco_bbox_doc_to_visual 4 | doc_to_text: !function utils.refcoco_doc_to_text 5 | doc_to_target: "answer" 6 | generation_kwargs: 7 | until: 8 | - "ASSISTANT:" 9 | process_results: !function utils.refcoco_process_result 10 | metric_list: 11 | - metric: refcoco_Bleu_4 12 | aggregation : !function utils.refcoco_bleu4 13 | higher_is_better : true 14 | - metric: refcoco_Bleu_3 15 | aggregation : !function utils.refcoco_bleu3 16 | higher_is_better : true 17 | - metric: refcoco_Bleu_2 18 | aggregation : !function utils.refcoco_bleu2 19 | higher_is_better : true 20 | - metric: refcoco_Bleu_1 21 | aggregation : !function utils.refcoco_bleu1 22 | higher_is_better : true 23 | - metric: refcoco_METEOR 24 | aggregation : !function utils.refcoco_meteor 25 | higher_is_better : true 26 | - metric: refcoco_ROUGE_L 27 | aggregation : !function utils.refcoco_rougel 28 | higher_is_better : true 29 | - metric: refcoco_CIDEr 30 | aggregation : !function utils.refcoco_cider 31 | higher_is_better : true 32 | #- metric: refcoco_SPICE 33 | # aggregation : !function utils.refcoco_spice 34 | # higher_is_better : true 35 | metadata: 36 | version: '0.0' -------------------------------------------------------------------------------- /Eagle/scripts/eval/vqav2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | CKPT=$1 3 | NAME=$2 4 | 5 | gpu_list="${CUDA_VISIBLE_DEVICES:-0}" 6 | IFS=',' read -ra GPULIST <<< "$gpu_list" 7 | 8 | CHUNKS=${#GPULIST[@]} 9 | 10 | SPLIT="llava_vqav2_mscoco_test-dev2015" 11 | LOCAL_ANSWER_DIR="./playground/data/eval_local_files/vqav2" 12 | 13 | for IDX in $(seq 0 $((CHUNKS-1))); do 14 | CUDA_VISIBLE_DEVICES=${GPULIST[$IDX]} python -m eagle.eval.model_vqa_loader \ 15 | --model-path $CKPT \ 16 | --question-file ./playground/data/eval/vqav2/$SPLIT.jsonl \ 17 | --image-folder ./playground/data/eval/vqav2/test2015 \ 18 | --answers-file ${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/${CHUNKS}_${IDX}.jsonl \ 19 | --num-chunks $CHUNKS \ 20 | --chunk-idx $IDX \ 21 | --temperature 0 \ 22 | --conv-mode vicuna_v1 & 23 | done 24 | 25 | wait 26 | 27 | output_file=${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/merge.jsonl 28 | 29 | # Clear out the output file if it exists. 30 | > "$output_file" 31 | 32 | # Loop through the indices and concatenate each file. 33 | for IDX in $(seq 0 $((CHUNKS-1))); do 34 | cat ${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/${CHUNKS}_${IDX}.jsonl >> "$output_file" 35 | done 36 | 37 | python scripts/convert_vqav2_for_submission.py --src ${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/merge.jsonl --save_path ${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/vqav2-upload-$NAME.json --split $SPLIT --ckpt $NAME -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/ai2d/utils.py: -------------------------------------------------------------------------------- 1 | def ai2d_doc_to_text(doc, model_specific_prompt_kwargs=None): 2 | question, choices = doc["question"], doc["options"] 3 | len_choices = len(choices) 4 | post_prompt = model_specific_prompt_kwargs["post_prompt"] 5 | pre_prompt = model_specific_prompt_kwargs["pre_prompt"] 6 | if model_specific_prompt_kwargs["prompt_format"] == "mcq": 7 | options = [chr(ord("A") + i) for i in range(len_choices)] 8 | choices_str = "\n".join([f"{option}. {choice}" for option, choice in zip(options, choices)]) 9 | return f"{pre_prompt}{question}\n{choices_str}{post_prompt}" 10 | elif model_specific_prompt_kwargs["prompt_format"] == "qa": 11 | options = "\n".join(choices) 12 | return f"{pre_prompt}{question}{options}{post_prompt}" 13 | else: 14 | raise ValueError(f"Unknown prompt format: {model_specific_prompt_kwargs['prompt_format']}") 15 | 16 | 17 | def ai2d_doc_to_visual(doc): 18 | return [doc["image"].convert("RGB")] 19 | 20 | 21 | def ai2d_doc_to_target(doc, model_specific_target_kwargs): 22 | if model_specific_target_kwargs == "mcq": 23 | len_choices = len(doc["options"]) 24 | options = [chr(ord("A") + i) for i in range(len_choices)] 25 | return options[int(doc["answer"])] 26 | elif model_specific_target_kwargs == "qa": 27 | return doc["options"][int(doc["answer"])] 28 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/hallusion_bench/hallusion_bench_image.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/HallusionBench 2 | dataset_kwargs: 3 | token: True 4 | task: "hallusion_bench_image" 5 | test_split: image 6 | output_type: generate_until 7 | doc_to_visual: !function evaluate_hb.hb_doc_to_visual 8 | doc_to_text: !function evaluate_hb.hb_doc_to_text 9 | doc_to_target: "gt_answer_details" 10 | process_results: !function evaluate_hb.hb_process_results 11 | model_specific_prompt_kwargs: 12 | default: 13 | pre_prompt: "" 14 | post_prompt: "" 15 | generation_kwargs: 16 | max_new_tokens: 128 17 | temperature: 0 18 | top_p: 0 19 | num_beams: 1 20 | do_sample: false 21 | metric_list: 22 | - metric: aAcc 23 | aggregation: !function evaluate_hb.hb_aggregation_result_aAcc 24 | higher_is_better: true 25 | - metric: qAcc 26 | aggregation: !function evaluate_hb.hb_aggregation_result_qAcc 27 | higher_is_better: true 28 | - metric: fAcc 29 | aggregation: !function evaluate_hb.hb_aggregation_result_fAcc 30 | higher_is_better: true 31 | # - metric: aAcc 32 | # aggregation: !function evaluate_hb.hb_aggregation_result_aAcc_intern 33 | # higher_is_better: true 34 | # - metric: qAcc 35 | # aggregation: !function evaluate_hb.hb_aggregation_result_qAcc_intern 36 | # higher_is_better: true 37 | # - metric: fAcc 38 | # aggregation: !function evaluate_hb.hb_aggregation_result_fAcc_intern 39 | # higher_is_better: true 40 | metadata: 41 | - version: 0.0 42 | -------------------------------------------------------------------------------- /Eagle2_5/document/how_to_use_lmdb_to_read_images.md: -------------------------------------------------------------------------------- 1 | ## original 2 | 3 | ```json 4 | { 5 | "conversations": [ 6 | {"from": "human", "value": " what is this?"}, 7 | {"from": "gpt", "value": "It is an apple."} 8 | ], 9 | "image": "path/to/image.jpg" 10 | } 11 | ``` 12 | 13 | ```python 14 | from PIL import Image 15 | pil_image = Image.open("path/to/image.jpg") 16 | ``` 17 | 18 | 19 | ### lmdb 20 | 21 | ```json 22 | # sample 23 | { 24 | "conversations": [ 25 | {"from": "human", "value": " what is this?"}, 26 | {"from": "gpt", "value": "It is an apple."} 27 | ], 28 | "image": { 29 | "lmdb_file": "path/to/lmdb/file", 30 | "lmdb_key": "image_key" 31 | } 32 | } 33 | ``` 34 | 35 | ```python 36 | import lmdb 37 | import cv2 38 | from PIL import Image 39 | import io 40 | import numpy as np 41 | 42 | image_meta = sample["image"] 43 | lmdb_file = image_meta["lmdb_file"] 44 | lmdb_key = image_meta["lmdb_key"] 45 | env = lmdb.open(lmdb_file, readonly=True, lock=False) 46 | with env.begin(write=False) as txn: 47 | image_bin = txn.get(lmdb_key.encode('ascii')) 48 | buf = io.BytesIO(image_bin) 49 | try: 50 | pil_image = Image.open(buf) 51 | except Exception as e: 52 | image_np = np.frombuffer(image_bin, dtype=np.uint8) 53 | image_bgr = cv2.imdecode(image_np, cv2.IMREAD_COLOR) 54 | image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) 55 | pil_image = Image.fromarray(image_rgb) 56 | ``` 57 | -------------------------------------------------------------------------------- /Eagle/scripts/zero3_offload.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "initial_scale_power": 16, 7 | "hysteresis": 2, 8 | "min_loss_scale": 1 9 | }, 10 | "bf16": { 11 | "enabled": "auto" 12 | }, 13 | "optimizer": { 14 | "type": "AdamW", 15 | "params": { 16 | "lr": "auto", 17 | "betas": "auto", 18 | "eps": "auto", 19 | "weight_decay": "auto" 20 | } 21 | }, 22 | "scheduler": { 23 | "type": "WarmupLR", 24 | "params": { 25 | "warmup_min_lr": "auto", 26 | "warmup_max_lr": "auto", 27 | "warmup_num_steps": "auto" 28 | } 29 | }, 30 | "zero_optimization": { 31 | "stage": 3, 32 | "offload_optimizer": { 33 | "device": "cpu", 34 | "pin_memory": true 35 | }, 36 | "offload_param": { 37 | "device": "cpu", 38 | "pin_memory": true 39 | }, 40 | "overlap_comm": true, 41 | "contiguous_gradients": true, 42 | "sub_group_size": 1e9, 43 | "reduce_bucket_size": "auto", 44 | "stage3_prefetch_bucket_size": "auto", 45 | "stage3_param_persistence_threshold": "auto", 46 | "stage3_max_live_parameters": 1e9, 47 | "stage3_max_reuse_distance": 1e9, 48 | "gather_16bit_weights_on_model_save": true 49 | }, 50 | "gradient_accumulation_steps": "auto", 51 | "gradient_clipping": "auto", 52 | "train_batch_size": "auto", 53 | "train_micro_batch_size_per_gpu": "auto", 54 | "steps_per_print": 1e5, 55 | "wall_clock_breakdown": false 56 | } -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/coco_cap/coco2017_cap_val.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/COCO-Caption2017 2 | dataset_kwargs: 3 | token: True 4 | task: "coco2017_cap_val" 5 | group : "coco_caption2017" 6 | test_split: val 7 | output_type: generate_until 8 | doc_to_visual: !function utils.coco_doc_to_visual 9 | doc_to_text: !function utils.coco_doc_to_text 10 | doc_to_target: "answer" 11 | generation_kwargs: 12 | max_new_tokens: 64 13 | temperature: 0 14 | top_p: 0 15 | num_beams: 1 16 | do_sample: false 17 | process_results: !function utils.coco_process_result 18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 19 | metric_list: 20 | - metric: coco_Bleu_4 21 | aggregation : !function utils.coco_bleu4 22 | higher_is_better : true 23 | - metric: coco_Bleu_3 24 | aggregation : !function utils.coco_bleu3 25 | higher_is_better : true 26 | - metric: coco_Bleu_2 27 | aggregation : !function utils.coco_bleu2 28 | higher_is_better : true 29 | - metric: coco_Bleu_1 30 | aggregation : !function utils.coco_bleu1 31 | higher_is_better : true 32 | - metric: coco_METEOR 33 | aggregation : !function utils.coco_meteor 34 | higher_is_better : true 35 | - metric: coco_ROUGE_L 36 | aggregation : !function utils.coco_rougel 37 | higher_is_better : true 38 | - metric: coco_CIDEr 39 | aggregation : !function utils.coco_cider 40 | higher_is_better : true 41 | #- metric: coco_SPICE 42 | # aggregation : !function utils.coco_spice 43 | # higher_is_better : true 44 | metadata: 45 | - version: 0.0 -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/flickr30k/flickr30k_test.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/flickr30k 2 | dataset_kwargs: 3 | token: True 4 | task : "flickr30k_test" 5 | test_split: test 6 | output_type: generate_until 7 | doc_to_visual: !function utils.flickr_doc_to_visual 8 | doc_to_text: !function utils.flickr_doc_to_text 9 | doc_to_target: "answer" 10 | generation_kwargs: 11 | max_new_tokens: 64 12 | temperature: 0 13 | top_p: 0 14 | num_beams: 1 15 | do_sample: false 16 | process_results: !function utils.flickr_process_result 17 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 18 | metric_list: 19 | - metric: flickr_Bleu_4 20 | aggregation : !function utils.flickr_bleu4 21 | higher_is_better : true 22 | - metric: flickr_Bleu_3 23 | aggregation : !function utils.flickr_bleu3 24 | higher_is_better : true 25 | - metric: flickr_Bleu_2 26 | aggregation : !function utils.flickr_bleu2 27 | higher_is_better : true 28 | - metric: flickr_Bleu_1 29 | aggregation : !function utils.flickr_bleu1 30 | higher_is_better : true 31 | - metric: flickr_METEOR 32 | aggregation : !function utils.flickr_meteor 33 | higher_is_better : true 34 | - metric: flickr_ROUGE_L 35 | aggregation : !function utils.flickr_rougel 36 | higher_is_better : true 37 | - metric: flickr_CIDEr 38 | aggregation : !function utils.flickr_cider 39 | higher_is_better : true 40 | #- metric: flickr_SPICE 41 | # aggregation : !function utils.flickr_spice 42 | # higher_is_better : true 43 | metadata: 44 | - version: 0.0 -------------------------------------------------------------------------------- /Eagle2_5/eaglevl/patch/fused_monkey_patch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 NVIDIA CORPORATION & AFFILIATES 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # SPDX-License-Identifier: Apache-2.0 16 | 17 | from .fused_ops.fused_rms_norm import LigerRMSNorm 18 | from .fused_ops.fused_rotary_pos_emb import liger_rotary_pos_emb 19 | from .fused_ops.fused_swiglu import LigerSwiGLUMLP 20 | 21 | 22 | def replace_liger_fused_ops(): 23 | from transformers.models.qwen2 import modeling_qwen2 24 | modeling_qwen2.Qwen2MLP = LigerSwiGLUMLP 25 | modeling_qwen2.Qwen2RMSNorm = LigerRMSNorm 26 | modeling_qwen2.apply_rotary_pos_emb = liger_rotary_pos_emb 27 | 28 | from transformers.models.llama import modeling_llama 29 | modeling_llama.LlamaMLP = LigerSwiGLUMLP 30 | modeling_llama.LlamaRMSNorm = LigerRMSNorm 31 | 32 | from transformers.models.qwen3 import modeling_qwen3 33 | modeling_qwen3.Qwen3MLP = LigerSwiGLUMLP 34 | modeling_qwen3.Qwen3RMSNorm = LigerRMSNorm 35 | modeling_qwen3.apply_rotary_pos_emb = liger_rotary_pos_emb 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/coco_cap/coco2014_cap_val.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/COCO-Caption 2 | dataset_kwargs: 3 | token: True 4 | task: "coco2014_cap_val" 5 | group : "coco_caption" 6 | test_split: val 7 | output_type: generate_until 8 | doc_to_visual: !function utils.coco_doc_to_visual 9 | doc_to_text: "Provide a one-sentence caption for the provided image." 10 | doc_to_target: "answer" 11 | generation_kwargs: 12 | max_new_tokens: 64 13 | temperature: 0 14 | top_p: 0 15 | num_beams: 1 16 | do_sample: false 17 | process_results: !function utils.coco_process_result 18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 19 | metric_list: 20 | - metric: coco_Bleu_4 21 | aggregation : !function utils.coco_bleu4 22 | higher_is_better : true 23 | - metric: coco_Bleu_3 24 | aggregation : !function utils.coco_bleu3 25 | higher_is_better : true 26 | - metric: coco_Bleu_2 27 | aggregation : !function utils.coco_bleu2 28 | higher_is_better : true 29 | - metric: coco_Bleu_1 30 | aggregation : !function utils.coco_bleu1 31 | higher_is_better : true 32 | - metric: coco_METEOR 33 | aggregation : !function utils.coco_meteor 34 | higher_is_better : true 35 | - metric: coco_ROUGE_L 36 | aggregation : !function utils.coco_rougel 37 | higher_is_better : true 38 | - metric: coco_CIDEr 39 | aggregation : !function utils.coco_cider 40 | higher_is_better : true 41 | #- metric: coco_SPICE 42 | # aggregation : !function utils.coco_spice 43 | # higher_is_better : true 44 | metadata: 45 | - version: 0.0 -------------------------------------------------------------------------------- /Eagle/scripts/eval/gqa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | CKPT=$1 3 | NAME=$2 4 | 5 | gpu_list="${CUDA_VISIBLE_DEVICES:-0}" 6 | IFS=',' read -ra GPULIST <<< "$gpu_list" 7 | 8 | CHUNKS=${#GPULIST[@]} 9 | 10 | SPLIT="llava_gqa_testdev_balanced" 11 | GQADIR="./playground/data/eval/gqa/data" 12 | LOCAL_ANSWER_DIR="./playground/data/eval_local_files/gqa" 13 | 14 | for IDX in $(seq 0 $((CHUNKS-1))); do 15 | CUDA_VISIBLE_DEVICES=${GPULIST[$IDX]} python -m eagle.eval.model_vqa_loader \ 16 | --model-path $CKPT \ 17 | --question-file ./playground/data/eval/gqa/$SPLIT.jsonl \ 18 | --image-folder ./playground/data/eval/gqa/data/images \ 19 | --answers-file ${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/${CHUNKS}_${IDX}.jsonl \ 20 | --num-chunks $CHUNKS \ 21 | --chunk-idx $IDX \ 22 | --temperature 0 \ 23 | --conv-mode vicuna_v1 & 24 | done 25 | 26 | wait 27 | 28 | output_file=${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/merge.jsonl 29 | 30 | # Clear out the output file if it exists. 31 | > "$output_file" 32 | 33 | # Loop through the indices and concatenate each file. 34 | for IDX in $(seq 0 $((CHUNKS-1))); do 35 | cat ${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/${CHUNKS}_${IDX}.jsonl >> "$output_file" 36 | done 37 | 38 | python scripts/convert_gqa_for_eval.py --src $output_file --dst ${LOCAL_ANSWER_DIR}/$SPLIT/$NAME/testdev_balanced_predictions.json 39 | absolute_path=$(readlink -f "${LOCAL_ANSWER_DIR}/$SPLIT/$NAME") 40 | 41 | cd $GQADIR 42 | # python eval/eval.py --predictions ${LOCAL_ANSWER_DIR}/$SPLIT/$name/{tier}_predictions.json --tier testdev_balanced 43 | python eval.py --predictions ${absolute_path}/{tier}_predictions.json --tier testdev_balanced -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/nocaps/nocaps_val.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/NoCaps 2 | dataset_kwargs: 3 | token: True 4 | task: "nocaps_val" 5 | group : "nocaps_caption" 6 | test_split: validation 7 | output_type: generate_until 8 | doc_to_visual: !function utils.nocaps_doc_to_visual 9 | doc_to_text: !function utils.nocaps_doc_to_text 10 | doc_to_target: "annotations_captions" 11 | generation_kwargs: 12 | max_new_tokens: 64 13 | temperature: 0 14 | top_p: 0 15 | num_beams: 1 16 | do_sample: false 17 | process_results: !function utils.nocaps_process_result 18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 19 | metric_list: 20 | - metric: nocaps_Bleu_4 21 | aggregation : !function utils.nocaps_bleu4 22 | higher_is_better : true 23 | - metric: nocaps_Bleu_3 24 | aggregation : !function utils.nocaps_bleu3 25 | higher_is_better : true 26 | - metric: nocaps_Bleu_2 27 | aggregation : !function utils.nocaps_bleu2 28 | higher_is_better : true 29 | - metric: nocaps_Bleu_1 30 | aggregation : !function utils.nocaps_bleu1 31 | higher_is_better : true 32 | - metric: nocaps_METEOR 33 | aggregation : !function utils.nocaps_meteor 34 | higher_is_better : true 35 | - metric: nocaps_ROUGE_L 36 | aggregation : !function utils.nocaps_rougel 37 | higher_is_better : true 38 | - metric: nocaps_CIDEr 39 | aggregation : !function utils.nocaps_cider 40 | higher_is_better : true 41 | #- metric: nocaps_SPICE 42 | # aggregation : !function utils.nocaps_spice 43 | # higher_is_better : true 44 | metadata: 45 | - version: 0.0 46 | include: _default_template_nocaps_yaml -------------------------------------------------------------------------------- /Eagle/lmms_eval/filters/__init__.py: -------------------------------------------------------------------------------- 1 | from lmms_eval.api.filter import FilterEnsemble 2 | from . import selection 3 | from . import extraction 4 | from . import transformation 5 | 6 | 7 | FILTER_REGISTRY = { 8 | "take_first": selection.TakeFirstFilter, 9 | "regex": extraction.RegexFilter, 10 | "majority_vote": selection.MajorityVoteFilter, 11 | "take_first_k": selection.TakeKFilter, 12 | "remove_whitespace": extraction.WhitespaceFilter, 13 | "lowercase": transformation.LowercaseFilter, 14 | "uppercase": transformation.UppercaseFilter, 15 | "map": transformation.MapFilter, 16 | # TODO: implement this filter. either it should take in an arbitrary "scoring"/reward function 17 | # that takes an input and returns a scalar and then should select the max reward, 18 | # or should implement different filters for different ways of handling a reward model's inference. 19 | # "arg_max": selection.ArgMaxFilter, 20 | } 21 | 22 | 23 | def get_filter(filter_name): 24 | if filter_name in FILTER_REGISTRY: 25 | return FILTER_REGISTRY[filter_name] 26 | else: 27 | return filter_name 28 | 29 | 30 | def build_filter_ensemble(filter_name, components): 31 | """ 32 | Create a filtering pipeline. 33 | """ 34 | filters = [] 35 | for function, kwargs in components: 36 | if kwargs is None: 37 | f = get_filter(function)() 38 | else: 39 | # create a filter given its name in the registry 40 | f = get_filter(function)(**kwargs) # TODO: pass kwargs to filters properly 41 | # add the filter as a pipeline step 42 | filters.append(f) 43 | 44 | return FilterEnsemble(name=filter_name, filters=filters) 45 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/textcaps/textcaps_val.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/TextCaps 2 | dataset_kwargs: 3 | token: True 4 | task: "textcaps_val" 5 | group : "textcaps_caption" 6 | test_split: val 7 | output_type: generate_until 8 | doc_to_visual: !function utils.textcaps_doc_to_visual 9 | doc_to_text: !function utils.textcaps_doc_to_text 10 | doc_to_target: "answer" 11 | generation_kwargs: 12 | max_new_tokens: 64 13 | temperature: 0 14 | top_p: 0 15 | num_beams: 1 16 | do_sample: false 17 | process_results: !function utils.textcaps_process_result 18 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 19 | metric_list: 20 | - metric: textcaps_Bleu_4 21 | aggregation : !function utils.textcaps_bleu4 22 | higher_is_better : true 23 | - metric: textcaps_Bleu_3 24 | aggregation : !function utils.textcaps_bleu3 25 | higher_is_better : true 26 | - metric: textcaps_Bleu_2 27 | aggregation : !function utils.textcaps_bleu2 28 | higher_is_better : true 29 | - metric: textcaps_Bleu_1 30 | aggregation : !function utils.textcaps_bleu1 31 | higher_is_better : true 32 | - metric: textcaps_METEOR 33 | aggregation : !function utils.textcaps_meteor 34 | higher_is_better : true 35 | - metric: textcaps_ROUGE_L 36 | aggregation : !function utils.textcaps_rougel 37 | higher_is_better : true 38 | - metric: textcaps_CIDEr 39 | aggregation : !function utils.textcaps_cider 40 | higher_is_better : true 41 | #- metric: textcaps_SPICE 42 | # aggregation : !function utils.textcaps_spice 43 | # higher_is_better : true 44 | metadata: 45 | - version: 0.0 46 | include: _default_template_textcaps_yaml -------------------------------------------------------------------------------- /Eagle/lmms_eval/filters/transformation.py: -------------------------------------------------------------------------------- 1 | from lmms_eval.api.filter import Filter 2 | 3 | 4 | class LowercaseFilter(Filter): 5 | def __init__(self) -> None: 6 | pass 7 | 8 | def apply(self, resps, docs): 9 | def filter_set(inst): 10 | return [resp.lower() for resp in inst] 11 | 12 | return [filter_set(resp) for resp in resps] 13 | 14 | 15 | class UppercaseFilter(Filter): 16 | def __init__(self) -> None: 17 | pass 18 | 19 | def apply(self, resps, docs): 20 | def filter_set(inst): 21 | return [resp.upper() for resp in inst] 22 | 23 | return [filter_set(resp) for resp in resps] 24 | 25 | 26 | class MapFilter(Filter): 27 | def __init__(self, mapping_dict: dict = {}, default_value=None) -> None: 28 | """ 29 | Initializes the MapFilter with a given mapping dictionary and default value. 30 | 31 | Args: 32 | - mapping_dict (dict): A dictionary containing the key-value mappings. 33 | Default is an empty dictionary. 34 | - default_value (Any): The value to be returned when a key is not found in the mapping_dict. 35 | Default is None. 36 | 37 | Example: 38 | mapper = MapFilter({'A': 1, 'B': 2}, default_value=0) 39 | """ 40 | assert isinstance(mapping_dict, dict), "Provided mapping_dict is not a dictionary" 41 | self.mapping_dict = mapping_dict 42 | self.default_value = default_value 43 | 44 | def apply(self, resps, docs): 45 | def filter_set(inst): 46 | return [self.mapping_dict.get(resp, self.default_value) for resp in inst] 47 | 48 | return [filter_set(resp) for resp in resps] 49 | -------------------------------------------------------------------------------- /Eagle/scripts/convert_vizwiz_for_submission.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import json 4 | 5 | # for debug 6 | import sys 7 | sys.path.append(os.getcwd()) 8 | 9 | from eagle.eval.m4c_evaluator import EvalAIAnswerProcessor 10 | 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--annotation-file', type=str, required=True) 15 | parser.add_argument('--result-file', type=str, required=True) 16 | parser.add_argument('--result-upload-file', type=str, required=True) 17 | return parser.parse_args() 18 | 19 | 20 | if __name__ == '__main__': 21 | 22 | args = parse_args() 23 | 24 | os.makedirs(os.path.dirname(args.result_upload_file), exist_ok=True) 25 | 26 | results = [] 27 | error_line = 0 28 | for line_idx, line in enumerate(open(args.result_file)): 29 | try: 30 | results.append(json.loads(line)) 31 | except: 32 | error_line += 1 33 | results = {x['question_id']: x['text'] for x in results} 34 | test_split = [json.loads(line) for line in open(args.annotation_file)] 35 | split_ids = set([x['question_id'] for x in test_split]) 36 | 37 | print(f'total results: {len(results)}, total split: {len(test_split)}, error_line: {error_line}') 38 | 39 | all_answers = [] 40 | 41 | answer_processor = EvalAIAnswerProcessor() 42 | 43 | for x in test_split: 44 | assert x['question_id'] in results 45 | all_answers.append({ 46 | 'image': x['image'], 47 | 'answer': answer_processor(results[x['question_id']]) 48 | }) 49 | 50 | with open(args.result_upload_file, 'w') as f: 51 | json.dump(all_answers, f) 52 | 53 | print(f"successfully saving results to {args.result_upload_file}") -------------------------------------------------------------------------------- /Eagle2_5/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "eagle_vl" 7 | version = "2.5" 8 | description = "Eagle 2.5: Boosting Long-Context Post-Training for Frontier Vision-Language Models" 9 | readme = "README.md" 10 | requires-python = ">=3.8" 11 | classifiers = [ 12 | "Programming Language :: Python :: 3", 13 | "License :: OSI Approved :: Apache Software License", 14 | ] 15 | 16 | dependencies = ["transformers==4.51.0", "tokenizers==0.21.1", "sentencepiece==0.2.0", "shortuuid", 17 | "accelerate==1.5.2", "peft==0.12.0", "bitsandbytes", "wandb==0.17.7", 18 | "pydantic==2.10.6", "markdown2[all]", "numpy==1.26.4", "scikit-learn>=1.2.2", 19 | "gradio", "gradio_client", "lmdb", 20 | "requests", "httpx==0.28.1", "uvicorn", "fastapi", "streamlit", "streamlit-image-select", 21 | "einops", "einops-exts", "timm==1.0.11", "deepspeed==0.16.5", "av", "liger_kernel==0.3.1", "filetype", "bitstring", 22 | "ebmlite", "triton==3.1.0", "scipy>=1.10.0", "datasets==2.21.0", "dotenv", "decord", "scikit-image", "imagehash", "swanlab", 23 | "torchcodec" 24 | ] 25 | 26 | 27 | 28 | [project.urls] 29 | "Homepage" = "https://nvlabs.github.io/EAGLE/" 30 | "Bug Tracker" = "https://github.com/NVlabs/EAGLE/issues" 31 | 32 | [tool.setuptools.packages.find] 33 | exclude = ["tools", "data*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "shell*", "work_dirs*", "pretrained*", "vid_playground*", "wandb*", "vlmeval*", "internel*", "streamlit*"] 34 | 35 | [tool.wheel] 36 | exclude = ["tools", "data*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "shell*", "work_dirs*", "pretrained*", "vid_playground*", "wandb*", "vlmeval*", "internel*", "streamlit*"] 37 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/textcaps/textcaps_train.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/TextCaps 2 | dataset_kwargs: 3 | token: True 4 | task : "textcaps_train" 5 | group : "textcaps_caption" 6 | test_split: train 7 | output_type: generate_until 8 | doc_to_visual: !function utils.textcaps_doc_to_visual 9 | doc_to_text: !function utils.textcaps_doc_to_text 10 | doc_to_target: "answer" 11 | generation_kwargs: 12 | until: 13 | - "ASSISTANT:" 14 | max_new_tokens: 1024 15 | temperature: 0 16 | top_p: 0 17 | num_beams: 1 18 | do_sample: false 19 | process_results: !function utils.textcaps_process_result 20 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 21 | metric_list: 22 | - metric: textcaps_Bleu_4 23 | aggregation : !function utils.textcaps_bleu4 24 | higher_is_better : true 25 | - metric: textcaps_Bleu_3 26 | aggregation : !function utils.textcaps_bleu3 27 | higher_is_better : true 28 | - metric: textcaps_Bleu_2 29 | aggregation : !function utils.textcaps_bleu2 30 | higher_is_better : true 31 | - metric: textcaps_Bleu_1 32 | aggregation : !function utils.textcaps_bleu1 33 | higher_is_better : true 34 | - metric: textcaps_METEOR 35 | aggregation : !function utils.textcaps_meteor 36 | higher_is_better : true 37 | - metric: textcaps_ROUGE_L 38 | aggregation : !function utils.textcaps_rougel 39 | higher_is_better : true 40 | - metric: textcaps_CIDEr 41 | aggregation : !function utils.textcaps_cider 42 | higher_is_better : true 43 | #- metric: textcaps_SPICE 44 | # aggregation : !function utils.textcaps_spice 45 | # higher_is_better : true 46 | metadata: 47 | - version: 0.0 48 | include: _default_template_textcaps_yaml -------------------------------------------------------------------------------- /Eagle/lmms_eval/filters/selection.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | 3 | from lmms_eval.api.filter import Filter 4 | 5 | 6 | class TakeFirstFilter(Filter): 7 | def __init__(self) -> None: 8 | """ 9 | Can define custom behavior here, if an individual instantiation of a Filter class should have state. 10 | """ 11 | 12 | def apply(self, resps, docs): 13 | """ 14 | Assuming each entry of `resps` is a list of model responses, we discard all but the first response. 15 | """ 16 | return map(lambda r: r[0], resps) 17 | 18 | 19 | class TakeKFilter(Filter): 20 | def __init__(self, *args, **kwargs) -> None: 21 | self.k = kwargs.pop("k") 22 | 23 | super().__init__(*args, **kwargs) 24 | 25 | def apply(self, resps, docs): 26 | # check we have at least k responses per doc, else we can't take the first k 27 | assert len(resps[0]) >= self.k, f"Need at least {self.k} responses per doc to take first {self.k}, but got {len(resps[0])} only! Please increase TaskConfig.repeats ." 28 | return map(lambda r: r[: self.k], resps) 29 | 30 | 31 | class MajorityVoteFilter(Filter): 32 | def __init__(self) -> None: 33 | """ 34 | Can define custom behavior here, if an individual instantiation of a Filter class should have state. 35 | """ 36 | 37 | def apply(self, resps, docs): 38 | """ 39 | Each entry of `resps` is a list of model responses. 40 | We select the response that occurs most frequently in each entry of `resps`. 41 | """ 42 | 43 | def select_majority(resp): 44 | counts = Counter(resp) 45 | vote = counts.most_common(1)[0][0] 46 | return vote 47 | 48 | return map(lambda r: [select_majority(r)], resps) 49 | -------------------------------------------------------------------------------- /Eagle/scripts/pretrain-eagle-x4-vicuna-13b.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | NAME=$1 3 | 4 | # export WANDB_DISABLED="true" 5 | export WANDB_PROJECT="eagle" 6 | export WANDB_RUN_ID=${NAME} 7 | export WANDB_RESUME="allow" 8 | 9 | echo "MASTER_ADDR=$MASTER_ADDR" 10 | n_node=$SLURM_JOB_NUM_NODES 11 | echo "number of nodes:" $n_node 12 | echo "node rank:" $SLURM_PROCID 13 | 14 | python -m torch.distributed.run \ 15 | --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \ 16 | --master_addr $MASTER_ADDR --master_port 25031 \ 17 | train_mem.py \ 18 | --deepspeed ./scripts/zero2.json \ 19 | --model_name_or_path lmsys/vicuna-13b-v1.5 \ 20 | --version plain \ 21 | --data_path $PATH_TO_PRETRAINING_DATA/blip_laion_cc_sbu_558k.json \ 22 | --image_folder $PATH_TO_PRETRAINING_DATA/images \ 23 | --vision_tower "clip-448;convnext-1024;det-1024;pix2struct-1024" \ 24 | --mm_projector_type mlp2x_gelu \ 25 | --tune_mm_mlp_adapter True \ 26 | --mm_vision_select_layer -2 \ 27 | --mm_use_im_start_end False \ 28 | --mm_use_im_patch_token False \ 29 | --bf16 True \ 30 | --output_dir ./checkpoints/$NAME \ 31 | --num_train_epochs 1 \ 32 | --per_device_train_batch_size 8 \ 33 | --per_device_eval_batch_size 4 \ 34 | --gradient_accumulation_steps 1 \ 35 | --evaluation_strategy "no" \ 36 | --save_strategy "steps" \ 37 | --save_steps 24000 \ 38 | --save_total_limit 1 \ 39 | --learning_rate 1e-3 \ 40 | --weight_decay 0. \ 41 | --warmup_ratio 0.03 \ 42 | --lr_scheduler_type "cosine" \ 43 | --logging_steps 1 \ 44 | --tf32 True \ 45 | --model_max_length 2048 \ 46 | --gradient_checkpointing True \ 47 | --dataloader_num_workers 4 \ 48 | --lazy_preprocess True \ 49 | --report_to wandb \ 50 | --run_name ${NAME} 51 | -------------------------------------------------------------------------------- /Eagle/scripts/pretrain-eagle-x4-vicuna-7b.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | NAME=$1 3 | 4 | # export WANDB_DISABLED="true" 5 | export WANDB_PROJECT="eagle" 6 | export WANDB_RUN_ID=${NAME} 7 | export WANDB_RESUME="allow" 8 | 9 | echo "MASTER_ADDR=$MASTER_ADDR" 10 | n_node=$SLURM_JOB_NUM_NODES 11 | echo "number of nodes:" $n_node 12 | echo "node rank:" $SLURM_PROCID 13 | 14 | python -m torch.distributed.run \ 15 | --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \ 16 | --master_addr $MASTER_ADDR --master_port 25031 \ 17 | train_mem.py \ 18 | --deepspeed ./scripts/zero2.json \ 19 | --model_name_or_path lmsys/vicuna-7b-v1.5 \ 20 | --version plain \ 21 | --data_path $PATH_TO_PRETRAINING_DATA/blip_laion_cc_sbu_558k.json \ 22 | --image_folder $PATH_TO_PRETRAINING_DATA/images \ 23 | --vision_tower "clip-448;convnext-1024;det-1024;pix2struct-1024" \ 24 | --mm_projector_type mlp2x_gelu \ 25 | --tune_mm_mlp_adapter True \ 26 | --mm_vision_select_layer -2 \ 27 | --mm_use_im_start_end False \ 28 | --mm_use_im_patch_token False \ 29 | --bf16 True \ 30 | --output_dir ./checkpoints/$NAME \ 31 | --num_train_epochs 1 \ 32 | --per_device_train_batch_size 8 \ 33 | --per_device_eval_batch_size 4 \ 34 | --gradient_accumulation_steps 1 \ 35 | --evaluation_strategy "no" \ 36 | --save_strategy "steps" \ 37 | --save_steps 24000 \ 38 | --save_total_limit 1 \ 39 | --learning_rate 1e-3 \ 40 | --weight_decay 0. \ 41 | --warmup_ratio 0.03 \ 42 | --lr_scheduler_type "cosine" \ 43 | --logging_steps 1 \ 44 | --tf32 True \ 45 | --model_max_length 2048 \ 46 | --gradient_checkpointing True \ 47 | --dataloader_num_workers 4 \ 48 | --lazy_preprocess True \ 49 | --report_to wandb \ 50 | --run_name ${NAME} 51 | -------------------------------------------------------------------------------- /Eagle/scripts/pretrain-eagle-x5-vicuna-7b.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | NAME=$1 3 | 4 | # export WANDB_DISABLED="true" 5 | export WANDB_PROJECT="eagle" 6 | export WANDB_RUN_ID=${NAME} 7 | export WANDB_RESUME="allow" 8 | 9 | echo "MASTER_ADDR=$MASTER_ADDR" 10 | n_node=$SLURM_JOB_NUM_NODES 11 | echo "number of nodes:" $n_node 12 | echo "node rank:" $SLURM_PROCID 13 | 14 | python -m torch.distributed.run \ 15 | --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \ 16 | --master_addr $MASTER_ADDR --master_port 25031 \ 17 | train_mem.py \ 18 | --deepspeed ./scripts/zero2.json \ 19 | --model_name_or_path lmsys/vicuna-7b-v1.5 \ 20 | --version plain \ 21 | --data_path $PATH_TO_PRETRAINING_DATA/blip_laion_cc_sbu_558k.json \ 22 | --image_folder $PATH_TO_PRETRAINING_DATA/images \ 23 | --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \ 24 | --mm_projector_type mlp2x_gelu \ 25 | --tune_mm_mlp_adapter True \ 26 | --mm_vision_select_layer -2 \ 27 | --mm_use_im_start_end False \ 28 | --mm_use_im_patch_token False \ 29 | --bf16 True \ 30 | --output_dir ./checkpoints/$NAME \ 31 | --num_train_epochs 1 \ 32 | --per_device_train_batch_size 8 \ 33 | --per_device_eval_batch_size 4 \ 34 | --gradient_accumulation_steps 1 \ 35 | --evaluation_strategy "no" \ 36 | --save_strategy "steps" \ 37 | --save_steps 24000 \ 38 | --save_total_limit 1 \ 39 | --learning_rate 1e-3 \ 40 | --weight_decay 0. \ 41 | --warmup_ratio 0.03 \ 42 | --lr_scheduler_type "cosine" \ 43 | --logging_steps 1 \ 44 | --tf32 True \ 45 | --model_max_length 2048 \ 46 | --gradient_checkpointing True \ 47 | --dataloader_num_workers 4 \ 48 | --lazy_preprocess True \ 49 | --report_to wandb \ 50 | --run_name ${NAME} 51 | -------------------------------------------------------------------------------- /Eagle/scripts/pretrain_eagle_x5_vicuna_7b.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | NAME=$1 3 | 4 | # export WANDB_DISABLED="true" 5 | export WANDB_PROJECT="eagle" 6 | export WANDB_RUN_ID=${NAME} 7 | export WANDB_RESUME="allow" 8 | 9 | echo "MASTER_ADDR=$MASTER_ADDR" 10 | n_node=$SLURM_JOB_NUM_NODES 11 | echo "number of nodes:" $n_node 12 | echo "node rank:" $SLURM_PROCID 13 | 14 | python -m torch.distributed.run \ 15 | --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \ 16 | --master_addr $MASTER_ADDR --master_port 25031 \ 17 | train_mem.py \ 18 | --deepspeed ./scripts/zero2.json \ 19 | --model_name_or_path lmsys/vicuna-7b-v1.5 \ 20 | --version plain \ 21 | --data_path $PATH_TO_PRETRAINING_DATA/blip_laion_cc_sbu_558k.json \ 22 | --image_folder $PATH_TO_PRETRAINING_DATA/images \ 23 | --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \ 24 | --mm_projector_type mlp2x_gelu \ 25 | --tune_mm_mlp_adapter True \ 26 | --mm_vision_select_layer -2 \ 27 | --mm_use_im_start_end False \ 28 | --mm_use_im_patch_token False \ 29 | --bf16 True \ 30 | --output_dir ./checkpoints/$NAME \ 31 | --num_train_epochs 1 \ 32 | --per_device_train_batch_size 8 \ 33 | --per_device_eval_batch_size 4 \ 34 | --gradient_accumulation_steps 1 \ 35 | --evaluation_strategy "no" \ 36 | --save_strategy "steps" \ 37 | --save_steps 24000 \ 38 | --save_total_limit 1 \ 39 | --learning_rate 1e-3 \ 40 | --weight_decay 0. \ 41 | --warmup_ratio 0.03 \ 42 | --lr_scheduler_type "cosine" \ 43 | --logging_steps 1 \ 44 | --tf32 True \ 45 | --model_max_length 2048 \ 46 | --gradient_checkpointing True \ 47 | --dataloader_num_workers 4 \ 48 | --lazy_preprocess True \ 49 | --report_to wandb \ 50 | --run_name ${NAME} 51 | -------------------------------------------------------------------------------- /Eagle/scripts/pretrain-eagle-x5-vicuna-13b.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | NAME=$1 3 | 4 | # export WANDB_DISABLED="true" 5 | export WANDB_PROJECT="eagle" 6 | export WANDB_RUN_ID=${NAME} 7 | export WANDB_RESUME="allow" 8 | 9 | echo "MASTER_ADDR=$MASTER_ADDR" 10 | n_node=$SLURM_JOB_NUM_NODES 11 | echo "number of nodes:" $n_node 12 | echo "node rank:" $SLURM_PROCID 13 | 14 | python -m torch.distributed.run \ 15 | --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \ 16 | --master_addr $MASTER_ADDR --master_port 25031 \ 17 | train_mem.py \ 18 | --deepspeed ./scripts/zero2.json \ 19 | --model_name_or_path lmsys/vicuna-13b-v1.5 \ 20 | --version plain \ 21 | --data_path $PATH_TO_PRETRAINING_DATA/blip_laion_cc_sbu_558k.json \ 22 | --image_folder $PATH_TO_PRETRAINING_DATA/images \ 23 | --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \ 24 | --mm_projector_type mlp2x_gelu \ 25 | --tune_mm_mlp_adapter True \ 26 | --mm_vision_select_layer -2 \ 27 | --mm_use_im_start_end False \ 28 | --mm_use_im_patch_token False \ 29 | --bf16 True \ 30 | --output_dir ./checkpoints/$NAME \ 31 | --num_train_epochs 1 \ 32 | --per_device_train_batch_size 8 \ 33 | --per_device_eval_batch_size 4 \ 34 | --gradient_accumulation_steps 1 \ 35 | --evaluation_strategy "no" \ 36 | --save_strategy "steps" \ 37 | --save_steps 24000 \ 38 | --save_total_limit 1 \ 39 | --learning_rate 1e-3 \ 40 | --weight_decay 0. \ 41 | --warmup_ratio 0.03 \ 42 | --lr_scheduler_type "cosine" \ 43 | --logging_steps 1 \ 44 | --tf32 True \ 45 | --model_max_length 2048 \ 46 | --gradient_checkpointing True \ 47 | --dataloader_num_workers 4 \ 48 | --lazy_preprocess True \ 49 | --report_to wandb \ 50 | --run_name ${NAME} 51 | -------------------------------------------------------------------------------- /Eagle/scripts/pretrain-eagle-x5-llama3-8b.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | NAME=$1 3 | 4 | # export WANDB_DISABLED="true" 5 | export WANDB_PROJECT="eagle" 6 | export WANDB_RUN_ID=${NAME} 7 | export WANDB_RESUME="allow" 8 | 9 | echo "MASTER_ADDR=$MASTER_ADDR" 10 | n_node=$SLURM_JOB_NUM_NODES 11 | echo "number of nodes:" $n_node 12 | echo "node rank:" $SLURM_PROCID 13 | 14 | python -m torch.distributed.run \ 15 | --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \ 16 | --master_addr $MASTER_ADDR --master_port 25031 \ 17 | train_mem.py \ 18 | --deepspeed ./scripts/zero2.json \ 19 | --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \ 20 | --version plain \ 21 | --data_path $PATH_TO_PRETRAINING_DATA/blip_laion_cc_sbu_558k.json \ 22 | --image_folder $PATH_TO_PRETRAINING_DATA/images \ 23 | --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \ 24 | --mm_projector_type mlp2x_gelu \ 25 | --tune_mm_mlp_adapter True \ 26 | --mm_vision_select_layer -2 \ 27 | --mm_use_im_start_end False \ 28 | --mm_use_im_patch_token False \ 29 | --bf16 True \ 30 | --output_dir ./checkpoints/$NAME \ 31 | --num_train_epochs 1 \ 32 | --per_device_train_batch_size 8 \ 33 | --per_device_eval_batch_size 4 \ 34 | --gradient_accumulation_steps 1 \ 35 | --evaluation_strategy "no" \ 36 | --save_strategy "steps" \ 37 | --save_steps 24000 \ 38 | --save_total_limit 1 \ 39 | --learning_rate 1e-3 \ 40 | --weight_decay 0. \ 41 | --warmup_ratio 0.03 \ 42 | --lr_scheduler_type "cosine" \ 43 | --logging_steps 1 \ 44 | --tf32 True \ 45 | --model_max_length 2048 \ 46 | --gradient_checkpointing True \ 47 | --dataloader_num_workers 4 \ 48 | --lazy_preprocess True \ 49 | --report_to wandb \ 50 | --run_name ${NAME} 51 | -------------------------------------------------------------------------------- /Eagle/scripts/finetune-eagle-x4-vicuna-13b-1.8m.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | NAME=$1 3 | 4 | export WANDB_PROJECT="eagle" 5 | export WANDB_RUN_ID=${NAME} 6 | export WANDB_RESUME="allow" 7 | 8 | echo "MASTER_ADDR=$MASTER_ADDR" 9 | n_node=$SLURM_JOB_NUM_NODES 10 | echo "number of nodes:" $n_node 11 | echo "node rank:" $SLURM_PROCID 12 | 13 | python -m torch.distributed.run \ 14 | --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \ 15 | --master_addr $MASTER_ADDR --master_port 25031 \ 16 | train_mem.py \ 17 | --deepspeed ./scripts/zero2.json \ 18 | --model_name_or_path lmsys/vicuna-13b-v1.5 \ 19 | --version v1 \ 20 | --data_path $PATH_TO_SFT_DATA/eagle-sft-v1-1_8m.json \ 21 | --image_folder $PATH_TO_SFT_DATA/images \ 22 | --vision_tower "clip-448;convnext-1024;det-1024;pix2struct-1024" \ 23 | --pretrain_mm_mlp_adapter $PATH_TO_PRETRAINED_PROJECTOR/mm_projector.bin \ 24 | --mm_projector_type mlp2x_gelu \ 25 | --mm_vision_select_layer -2 \ 26 | --mm_use_im_start_end False \ 27 | --mm_use_im_patch_token False \ 28 | --image_aspect_ratio pad \ 29 | --group_by_modality_length True \ 30 | --bf16 True \ 31 | --output_dir ./checkpoints/$NAME \ 32 | --num_train_epochs 1 \ 33 | --per_device_train_batch_size 4 \ 34 | --per_device_eval_batch_size 4 \ 35 | --gradient_accumulation_steps 1 \ 36 | --evaluation_strategy "no" \ 37 | --save_strategy "steps" \ 38 | --save_steps 500 \ 39 | --save_total_limit 1 \ 40 | --learning_rate 2e-5 \ 41 | --weight_decay 0. \ 42 | --warmup_ratio 0.03 \ 43 | --lr_scheduler_type "cosine" \ 44 | --logging_steps 1 \ 45 | --tf32 True \ 46 | --model_max_length 2048 \ 47 | --gradient_checkpointing True \ 48 | --dataloader_num_workers 4 \ 49 | --lazy_preprocess True \ 50 | --report_to wandb \ 51 | --run_name ${NAME} 52 | -------------------------------------------------------------------------------- /Eagle/scripts/finetune-eagle-x4-vicuna-7b-1.8m.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | NAME=$1 3 | 4 | export WANDB_PROJECT="eagle" 5 | export WANDB_RUN_ID=${NAME} 6 | export WANDB_RESUME="allow" 7 | 8 | echo "MASTER_ADDR=$MASTER_ADDR" 9 | n_node=$SLURM_JOB_NUM_NODES 10 | echo "number of nodes:" $n_node 11 | echo "node rank:" $SLURM_PROCID 12 | 13 | python -m torch.distributed.run \ 14 | --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \ 15 | --master_addr $MASTER_ADDR --master_port 25031 \ 16 | train_mem.py \ 17 | --deepspeed ./scripts/zero2.json \ 18 | --model_name_or_path lmsys/vicuna-7b-v1.5 \ 19 | --version v1 \ 20 | --data_path $PATH_TO_SFT_DATA/eagle-sft-v1-1_8m.json \ 21 | --image_folder $PATH_TO_SFT_DATA/images \ 22 | --vision_tower "clip-448;convnext-1024;det-1024;pix2struct-1024" \ 23 | --pretrain_mm_mlp_adapter $PATH_TO_PRETRAINED_PROJECTOR/mm_projector.bin \ 24 | --mm_projector_type mlp2x_gelu \ 25 | --mm_vision_select_layer -2 \ 26 | --mm_use_im_start_end False \ 27 | --mm_use_im_patch_token False \ 28 | --image_aspect_ratio pad \ 29 | --group_by_modality_length True \ 30 | --bf16 True \ 31 | --output_dir ./checkpoints/$NAME \ 32 | --num_train_epochs 1 \ 33 | --per_device_train_batch_size 4 \ 34 | --per_device_eval_batch_size 4 \ 35 | --gradient_accumulation_steps 1 \ 36 | --evaluation_strategy "no" \ 37 | --save_strategy "steps" \ 38 | --save_steps 500 \ 39 | --save_total_limit 1 \ 40 | --learning_rate 2e-5 \ 41 | --weight_decay 0. \ 42 | --warmup_ratio 0.03 \ 43 | --lr_scheduler_type "cosine" \ 44 | --logging_steps 1 \ 45 | --tf32 True \ 46 | --model_max_length 2048 \ 47 | --gradient_checkpointing True \ 48 | --dataloader_num_workers 4 \ 49 | --lazy_preprocess True \ 50 | --report_to wandb \ 51 | --run_name ${NAME} 52 | -------------------------------------------------------------------------------- /Eagle/scripts/finetune-eagle-x5-vicuna-7b-1.8m.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | NAME=$1 3 | 4 | export WANDB_PROJECT="eagle" 5 | export WANDB_RUN_ID=${NAME} 6 | export WANDB_RESUME="allow" 7 | 8 | echo "MASTER_ADDR=$MASTER_ADDR" 9 | n_node=$SLURM_JOB_NUM_NODES 10 | echo "number of nodes:" $n_node 11 | echo "node rank:" $SLURM_PROCID 12 | 13 | python -m torch.distributed.run \ 14 | --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \ 15 | --master_addr $MASTER_ADDR --master_port 25031 \ 16 | train_mem.py \ 17 | --deepspeed ./scripts/zero2.json \ 18 | --model_name_or_path lmsys/vicuna-7b-v1.5 \ 19 | --version v1 \ 20 | --data_path $PATH_TO_SFT_DATA/eagle-sft-v1-1_8m.json \ 21 | --image_folder $PATH_TO_SFT_DATA/images \ 22 | --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \ 23 | --mm_projector_type mlp2x_gelu \ 24 | --pretrain_mm_mlp_adapter $PATH_TO_PRETRAINED_PROJECTOR/mm_projector.bin \ 25 | --mm_vision_select_layer -2 \ 26 | --mm_use_im_start_end False \ 27 | --mm_use_im_patch_token False \ 28 | --image_aspect_ratio pad \ 29 | --group_by_modality_length True \ 30 | --bf16 True \ 31 | --output_dir ./checkpoints/$NAME \ 32 | --num_train_epochs 1 \ 33 | --per_device_train_batch_size 4 \ 34 | --per_device_eval_batch_size 4 \ 35 | --gradient_accumulation_steps 1 \ 36 | --evaluation_strategy "no" \ 37 | --save_strategy "steps" \ 38 | --save_steps 500 \ 39 | --save_total_limit 1 \ 40 | --learning_rate 2e-5 \ 41 | --weight_decay 0. \ 42 | --warmup_ratio 0.03 \ 43 | --lr_scheduler_type "cosine" \ 44 | --logging_steps 1 \ 45 | --tf32 True \ 46 | --model_max_length 2048 \ 47 | --gradient_checkpointing True \ 48 | --dataloader_num_workers 4 \ 49 | --lazy_preprocess True \ 50 | --report_to wandb \ 51 | --run_name ${NAME} 52 | -------------------------------------------------------------------------------- /Eagle/scripts/finetune-eagle-x5-vicuna-13b-1.8m.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | NAME=$1 3 | 4 | export WANDB_PROJECT="eagle" 5 | export WANDB_RUN_ID=${NAME} 6 | export WANDB_RESUME="allow" 7 | 8 | echo "MASTER_ADDR=$MASTER_ADDR" 9 | n_node=$SLURM_JOB_NUM_NODES 10 | echo "number of nodes:" $n_node 11 | echo "node rank:" $SLURM_PROCID 12 | 13 | python -m torch.distributed.run \ 14 | --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \ 15 | --master_addr $MASTER_ADDR --master_port 25031 \ 16 | train_mem.py \ 17 | --deepspeed ./scripts/zero2.json \ 18 | --model_name_or_path lmsys/vicuna-13b-v1.5 \ 19 | --version v1 \ 20 | --data_path $PATH_TO_SFT_DATA/eagle-sft-v1-1_8m.json \ 21 | --image_folder $PATH_TO_SFT_DATA/images \ 22 | --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \ 23 | --pretrain_mm_mlp_adapter $PATH_TO_PRETRAINED_PROJECTOR/mm_projector.bin \ 24 | --mm_projector_type mlp2x_gelu \ 25 | --mm_vision_select_layer -2 \ 26 | --mm_use_im_start_end False \ 27 | --mm_use_im_patch_token False \ 28 | --image_aspect_ratio pad \ 29 | --group_by_modality_length True \ 30 | --bf16 True \ 31 | --output_dir ./checkpoints/$NAME \ 32 | --num_train_epochs 1 \ 33 | --per_device_train_batch_size 4 \ 34 | --per_device_eval_batch_size 4 \ 35 | --gradient_accumulation_steps 1 \ 36 | --evaluation_strategy "no" \ 37 | --save_strategy "steps" \ 38 | --save_steps 500 \ 39 | --save_total_limit 1 \ 40 | --learning_rate 2e-5 \ 41 | --weight_decay 0. \ 42 | --warmup_ratio 0.03 \ 43 | --lr_scheduler_type "cosine" \ 44 | --logging_steps 1 \ 45 | --tf32 True \ 46 | --model_max_length 2048 \ 47 | --gradient_checkpointing True \ 48 | --dataloader_num_workers 4 \ 49 | --lazy_preprocess True \ 50 | --report_to wandb \ 51 | --run_name ${NAME} 52 | -------------------------------------------------------------------------------- /Eagle/scripts/pretrain-eagle-x5-yi34b-cambrian.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | NAME=$1 3 | # We use 128 GPU to pretrain the Yi-34B model, the total batch size is 1024. 4 | 5 | # export WANDB_DISABLED="true" 6 | export WANDB_PROJECT="eagle" 7 | export WANDB_RUN_ID=${NAME} 8 | export WANDB_RESUME="allow" 9 | 10 | echo "MASTER_ADDR=$MASTER_ADDR" 11 | n_node=$SLURM_JOB_NUM_NODES 12 | echo "number of nodes:" $n_node 13 | echo "node rank:" $SLURM_PROCID 14 | 15 | python -m torch.distributed.run \ 16 | --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \ 17 | --master_addr $MASTER_ADDR --master_port 25031 \ 18 | train_mem.py \ 19 | --deepspeed ./scripts/zero3.json \ 20 | --model_name_or_path NousResearch/Nous-Hermes-2-Yi-34B \ 21 | --version plain \ 22 | --data_path $PATH_TO_CAMBRIAN_PRETRAINING_DATA \ 23 | --image_folder $PATH_TO_CAMBRIAN_PRETRAINING_DATA \ 24 | --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \ 25 | --mm_projector_type mlp2x_gelu \ 26 | --tune_mm_mlp_adapter True \ 27 | --mm_vision_select_layer -2 \ 28 | --mm_use_im_start_end False \ 29 | --mm_use_im_patch_token False \ 30 | --bf16 True \ 31 | --output_dir ./checkpoints/$NAME \ 32 | --num_train_epochs 1 \ 33 | --per_device_train_batch_size 8 \ 34 | --per_device_eval_batch_size 4 \ 35 | --gradient_accumulation_steps 1 \ 36 | --evaluation_strategy "no" \ 37 | --save_strategy "steps" \ 38 | --save_steps 24000 \ 39 | --save_total_limit 1 \ 40 | --learning_rate 2e-4 \ 41 | --weight_decay 0. \ 42 | --warmup_ratio 0.03 \ 43 | --lr_scheduler_type "cosine" \ 44 | --logging_steps 1 \ 45 | --tf32 True \ 46 | --model_max_length 2048 \ 47 | --gradient_checkpointing True \ 48 | --dataloader_num_workers 4 \ 49 | --lazy_preprocess True \ 50 | --report_to wandb \ 51 | --run_name ${NAME} -------------------------------------------------------------------------------- /Eagle/scripts/finetune-eagle-x5-llama3-8b-1.8m.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | NAME=$1 3 | 4 | export WANDB_PROJECT="eagle" 5 | export WANDB_RUN_ID=${NAME} 6 | export WANDB_RESUME="allow" 7 | 8 | echo "MASTER_ADDR=$MASTER_ADDR" 9 | n_node=$SLURM_JOB_NUM_NODES 10 | echo "number of nodes:" $n_node 11 | echo "node rank:" $SLURM_PROCID 12 | 13 | python -m torch.distributed.run \ 14 | --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \ 15 | --master_addr $MASTER_ADDR --master_port 25031 \ 16 | train_mem.py \ 17 | --deepspeed ./scripts/zero2.json \ 18 | --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \ 19 | --version llama3 \ 20 | --data_path $PATH_TO_SFT_DATA/eagle-sft-v1-1_8m.json \ 21 | --image_folder $PATH_TO_SFT_DATA/images \ 22 | --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \ 23 | --mm_projector_type mlp2x_gelu \ 24 | --pretrain_mm_mlp_adapter $PATH_TO_PRETRAINED_PROJECTOR/mm_projector.bin \ 25 | --mm_vision_select_layer -2 \ 26 | --mm_use_im_start_end False \ 27 | --mm_use_im_patch_token False \ 28 | --image_aspect_ratio pad \ 29 | --group_by_modality_length True \ 30 | --bf16 True \ 31 | --output_dir ./checkpoints/$NAME \ 32 | --num_train_epochs 1 \ 33 | --per_device_train_batch_size 4 \ 34 | --per_device_eval_batch_size 4 \ 35 | --gradient_accumulation_steps 1 \ 36 | --evaluation_strategy "no" \ 37 | --save_strategy "steps" \ 38 | --save_steps 500 \ 39 | --save_total_limit 1 \ 40 | --learning_rate 2e-5 \ 41 | --weight_decay 0. \ 42 | --warmup_ratio 0.03 \ 43 | --lr_scheduler_type "cosine" \ 44 | --logging_steps 1 \ 45 | --tf32 True \ 46 | --model_max_length 2048 \ 47 | --gradient_checkpointing True \ 48 | --dataloader_num_workers 4 \ 49 | --lazy_preprocess True \ 50 | --report_to wandb \ 51 | --run_name ${NAME} 52 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/scienceqa/utils.py: -------------------------------------------------------------------------------- 1 | def sqa_doc_to_text(doc, model_specific_prompt_kwargs=None): 2 | context, question, choices = doc["hint"], doc["question"], doc["choices"] 3 | len_choices = len(choices) 4 | options = [chr(ord("A") + i) for i in range(len_choices)] 5 | choices_str = "\n".join([f"{option}. {choice}" for option, choice in zip(options, choices)]) 6 | if model_specific_prompt_kwargs["format"] == "default": 7 | if context: 8 | context = f"Context: {context}\n" 9 | 10 | post_prompt = model_specific_prompt_kwargs["post_prompt"] 11 | pre_prompt = model_specific_prompt_kwargs["pre_prompt"] 12 | return f"{pre_prompt}{context}{question}\n{choices_str}{post_prompt}" 13 | elif model_specific_prompt_kwargs["format"] == "qwen_vl": 14 | prompt = "Context: {}\nQuestion: {}\nOptions: {}\nAnswer:" 15 | context = context if context else "N/A" 16 | prompt = prompt.format(context, question, choices_str) 17 | return prompt 18 | else: 19 | raise ValueError(f"Unknown prompt format: {model_specific_prompt_kwargs}") 20 | 21 | 22 | def sqa_doc_to_visual(doc): 23 | if doc["image"] is None: 24 | return [] 25 | return [doc["image"].convert("RGB")] 26 | 27 | 28 | def sqa_doc_to_target(doc): 29 | len_choices = len(doc["choices"]) 30 | options = [chr(ord("A") + i) for i in range(len_choices)] 31 | return options[doc["answer"]] 32 | 33 | 34 | def sqa_process_results(doc, results): 35 | # I know this is weird, but it's how llava parse it. 36 | target = sqa_doc_to_target(doc) 37 | pred = results[0] 38 | if pred == target: 39 | return {"exact_match": 1.0} 40 | # pattern: ^[A-Z]\. .* 41 | if len(pred) >= 2 and pred[0].isupper() and pred[1] == ".": 42 | result = 1.0 if pred[0] == target else 0.0 43 | return {"exact_match": result} 44 | return {"exact_match": 0.0} 45 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/seedbench_2/seedbench_2.yaml: -------------------------------------------------------------------------------- 1 | dataset_path: lmms-lab/SEED-Bench-2 2 | dataset_kwargs: 3 | token: True 4 | task: "seedbench-2" 5 | test_split: test 6 | output_type: generate_until 7 | doc_to_visual: !function utils.seed_doc_to_visual 8 | doc_to_text: !function utils.seed_doc_to_text 9 | doc_to_target: "answer" 10 | generation_kwargs: 11 | until: 12 | - "ASSISTANT:" 13 | max_new_tokens: 16 14 | image_aspect_ratio: original 15 | # The return value of process_results will be used by metrics 16 | process_results: !function utils.seed_process_result 17 | # Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results 18 | metric_list: 19 | - metric: seed_Video 20 | aggregation: !function utils.seed_aggregation_result 21 | higher_is_better: true 22 | - metric: seed_Multiple_Images 23 | aggregation: !function utils.seed_aggregation_result 24 | higher_is_better: true 25 | - metric: seed_Image_&_Text_Generation 26 | aggregation: !function utils.seed_aggregation_result 27 | higher_is_better: true 28 | - metric: seed_Single_Image 29 | aggregation: !function utils.seed_aggregation_result 30 | higher_is_better: true 31 | - metric: seed_Image_Generation 32 | aggregation: !function utils.seed_aggregation_result 33 | higher_is_better: true 34 | - metric: seed_Interleaved_Image 35 | aggregation: !function utils.seed_aggregation_result 36 | higher_is_better: true 37 | - metric: seed_all 38 | aggregation: !function utils.seed_aggregation_result 39 | higher_is_better: true 40 | metadata: 41 | - version: 0.0 42 | 43 | model_specific_prompt_kwargs: 44 | llava : 45 | img_token : 46 | post_prompt : "Answer with the option's letter from the given choices directly." 47 | gpt4V : 48 | img_token : 49 | post_prompt : "Answer with the option's letter from the given choices directly." -------------------------------------------------------------------------------- /Eagle/scripts/finetune-eagle-x5-yi-34b-cambrian-7m.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | NAME=$1 3 | 4 | # We fine-tune this model on 256 A100 GPU 5 | export WANDB_PROJECT="eagle" 6 | export WANDB_RUN_ID=${NAME} 7 | export WANDB_RESUME="allow" 8 | 9 | echo "MASTER_ADDR=$MASTER_ADDR" 10 | n_node=$SLURM_JOB_NUM_NODES 11 | echo "number of nodes:" $n_node 12 | echo "node rank:" $SLURM_PROCID 13 | 14 | python -m torch.distributed.run \ 15 | --nproc_per_node 8 --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \ 16 | --master_addr $MASTER_ADDR --master_port 25031 \ 17 | train_mem.py \ 18 | --deepspeed ./scripts/zero3.json \ 19 | --model_name_or_path NousResearch/Nous-Hermes-2-Yi-34B \ 20 | --version yi_34b_chatml_direct \ 21 | --data_path $PATH_TO_CAMBRIAN_SFT_DATA \ 22 | --image_folder $PATH_TO_CAMBRIAN_SFT_DATA \ 23 | --vision_tower "clip-448;convnext-1024;sam-1024;det-1024;pix2struct-1024" \ 24 | --pretrain_mm_mlp_adapter $PATH_TO_PRETRAINED_PROJECTOR/mm_projector.bin \ 25 | --mm_projector_type mlp2x_gelu \ 26 | --mm_vision_select_layer -2 \ 27 | --mm_use_im_start_end False \ 28 | --mm_use_im_patch_token False \ 29 | --image_aspect_ratio pad \ 30 | --group_by_modality_length True \ 31 | --bf16 True \ 32 | --output_dir ./checkpoints/$NAME \ 33 | --num_train_epochs 1 \ 34 | --per_device_train_batch_size 4 \ 35 | --per_device_eval_batch_size 4 \ 36 | --gradient_accumulation_steps 1 \ 37 | --evaluation_strategy "no" \ 38 | --save_strategy "steps" \ 39 | --save_steps 500 \ 40 | --save_total_limit 1 \ 41 | --learning_rate 2e-5 \ 42 | --weight_decay 0. \ 43 | --warmup_ratio 0.03 \ 44 | --lr_scheduler_type "cosine" \ 45 | --logging_steps 1 \ 46 | --tf32 True \ 47 | --model_max_length 2048 \ 48 | --gradient_checkpointing True \ 49 | --dataloader_num_workers 4 \ 50 | --lazy_preprocess True \ 51 | --report_to wandb \ 52 | --run_name ${NAME} 53 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/filters/extraction.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from lmms_eval.api.filter import Filter 4 | 5 | 6 | class RegexFilter(Filter): 7 | """ """ 8 | 9 | def __init__(self, regex_pattern: str = r"#### (\-?[0-9\.\,]+)", fallback: str = "[invalid]") -> None: 10 | """ 11 | pass a string `regex` to run `re.compile(r"regex")` on. 12 | `fallback` defines the output returned if no matches for the regex are located. 13 | """ 14 | self.regex_pattern = regex_pattern 15 | self.regex = re.compile(regex_pattern) 16 | self.fallback = fallback 17 | 18 | def apply(self, resps, docs): 19 | # here, we assume we have a list, in which each element is 20 | # a list of model responses for some particular input/target pair. 21 | # so we process each of these (same input/target response sets) 22 | # independently (and keep them a list.) 23 | def filter_set(inst): 24 | filtered = [] 25 | for resp in inst: 26 | match = self.regex.search(resp) 27 | if match: 28 | match = match.group(1).strip() 29 | else: 30 | match = self.fallback 31 | filtered.append(match) 32 | return filtered 33 | 34 | # print(resps) 35 | filtered_resps = list(map(lambda x: filter_set(x), resps)) 36 | # print(filtered_resps) 37 | 38 | return filtered_resps 39 | 40 | 41 | class WhitespaceFilter(Filter): 42 | """ """ 43 | 44 | def __init__(self) -> None: 45 | pass 46 | 47 | def apply(self, resps, docs): 48 | def filter_set(inst): 49 | filtered_resp = [] 50 | for resp in inst: 51 | if resp.startswith(" "): 52 | resp = resp[1:] 53 | 54 | filtered_resp.append(resp) 55 | 56 | return filtered_resp 57 | 58 | filtered_resps = [filter_set(resp) for resp in resps] 59 | 60 | return filtered_resps 61 | -------------------------------------------------------------------------------- /Eagle2_5/streamlit_demo/sd_worker.py: -------------------------------------------------------------------------------- 1 | # This file is adopted from the InternVL project 2 | # (https://github.com/OpenGVLab/InternVL), licensed under the MIT License. 3 | # 4 | # -------------------------------------------------------- 5 | # InternVL 6 | # Copyright (c) 2023 OpenGVLab 7 | # Licensed under The MIT License 8 | # -------------------------------------------------------- 9 | 10 | from io import BytesIO 11 | 12 | import torch 13 | from diffusers import StableDiffusion3Pipeline 14 | from fastapi import FastAPI 15 | from fastapi.responses import Response 16 | from pydantic import BaseModel 17 | 18 | # Initialize pipeline 19 | pipe = StableDiffusion3Pipeline.from_pretrained('stabilityai/stable-diffusion-3-medium-diffusers', 20 | torch_dtype=torch.float16) 21 | pipe = pipe.to('cuda') 22 | 23 | # Create a FastAPI application 24 | app = FastAPI() 25 | 26 | 27 | # Define the input data model 28 | class CaptionRequest(BaseModel): 29 | caption: str 30 | 31 | 32 | # Defining API endpoints 33 | @app.post('/generate_image/') 34 | async def generate_image(request: CaptionRequest): 35 | caption = request.caption 36 | negative_prompt = 'blurry, low resolution, artifacts, unnatural, poorly drawn, bad anatomy, out of focus' 37 | image = pipe( 38 | caption, 39 | negative_prompt=negative_prompt, 40 | num_inference_steps=20, 41 | guidance_scale=7.0 42 | ).images[0] 43 | 44 | # Converts an image to a byte stream 45 | img_byte_arr = BytesIO() 46 | image.save(img_byte_arr, format='PNG') 47 | img_byte_arr = img_byte_arr.getvalue() 48 | 49 | return Response(content=img_byte_arr, media_type='image/png') 50 | 51 | 52 | # Run the Uvicorn server 53 | if __name__ == '__main__': 54 | import argparse 55 | 56 | import uvicorn 57 | parser = argparse.ArgumentParser() 58 | parser.add_argument('--port', default=11005, type=int) 59 | args = parser.parse_args() 60 | 61 | uvicorn.run(app, host='0.0.0.0', port=args.port) 62 | -------------------------------------------------------------------------------- /Eagle2_5/document/1.installing.md: -------------------------------------------------------------------------------- 1 | # Installation Guide for Eagle2.5 2 | 3 | 4 | **Step 1: Create a New Conda Environment** *(If Docker not used)* 5 | 6 | Create an environment named `eagle` with Python 3.10: 7 | ```bash 8 | conda create -n eagle python=3.10 9 | conda activate eagle 10 | ``` 11 | 12 | --- 13 | 14 | **Step 2: Install PyTorch and Flash Attention** *(If Docker not used; match your CUDA)* 15 | 16 | Install PyTorch and Flash Attention: 17 | ```bash 18 | pip install torch==2.5.0 --index-url https://download.pytorch.org/whl/cu124 19 | pip install flash-attn==2.4.2 --no-build-isolation 20 | ``` 21 | 22 | (optional) Flash-Attention 3 for Hopper 23 | ``` 24 | # Flash-Attention 3 for Hopper 25 | git clone https://github.com/Dao-AILab/flash-attention 26 | cd flash-attention/hopper 27 | python setup.py install 28 | ``` 29 | --- 30 | 31 | **Step 3: Install Eagle2.5** 32 | 33 | Clone and install: 34 | ```bash 35 | # Clone the repository 36 | git clone -b main --single-branch https://github.com/NVlabs/Eagle.git 37 | 38 | # Navigate to the working directory and install 39 | cd Eagle/Eagle2_5 40 | pip install -e . 41 | ``` 42 | 43 | --- 44 | 45 | 46 | ## 🚨 Troubleshooting Common Issues 47 | 48 | ### Transformers Installation 49 | 50 | The project requires `transformers==4.51.0` as specified in `pyproject.toml`. If you encounter issues installing this version, follow these steps: 51 | 52 | 1. Remove the transformers dependency `"transformers"==4.51.0` from `pyproject.toml` 53 | 2. Install transformers manually using the following commands: 54 | ```bash 55 | git clone https://github.com/huggingface/transformers.git 56 | cd transformers 57 | git checkout 279c2e302ae4993986d6681c5885990c55eb5972 58 | pip install -e . 59 | ``` 60 | 61 | ### 🐞 OpenCV-related Issues 62 | 63 | - **Error:** `ImportError: libGL.so.1: cannot open shared object file` 64 | 65 | Fix by installing `libgl1`: 66 | ```bash 67 | sudo apt update 68 | sudo apt install libgl1 69 | ``` 70 | 71 | ✅ **Setup complete!** You're now ready to use Eagle2.5. 72 | -------------------------------------------------------------------------------- /Eagle/eagle/model/multimodal_projector/builder.py: -------------------------------------------------------------------------------- 1 | # This file is derived from the LLaVA project 2 | # (https://github.com/haotian-liu/LLaVA), which is licensed under 3 | # the Apache License, Version 2.0. 4 | # 5 | # -------------------------------------------------------- 6 | # LLaVA 7 | # Copyright (c) 2023 Haotian Liu 8 | # Licensed under the Apache License, Version 2.0 9 | # -------------------------------------------------------- 10 | 11 | import torch 12 | import torch.nn as nn 13 | import re 14 | 15 | class IdentityMap(nn.Module): 16 | def __init__(self): 17 | super().__init__() 18 | 19 | def forward(self, x, *args, **kwargs): 20 | return x 21 | 22 | @property 23 | def config(self): 24 | return {"mm_projector_type": 'identity'} 25 | 26 | 27 | class SimpleResBlock(nn.Module): 28 | def __init__(self, channels): 29 | super().__init__() 30 | self.pre_norm = nn.LayerNorm(channels) 31 | 32 | self.proj = nn.Sequential( 33 | nn.Linear(channels, channels), 34 | nn.GELU(), 35 | nn.Linear(channels, channels) 36 | ) 37 | def forward(self, x): 38 | x = self.pre_norm(x) 39 | return x + self.proj(x) 40 | 41 | 42 | def build_vision_projector(config, delay_load=False, fpn_input_dim=[], **kwargs): 43 | projector_type = getattr(config, 'mm_projector_type', 'linear') 44 | 45 | if projector_type == 'linear': 46 | return nn.Linear(config.mm_hidden_size, config.hidden_size) 47 | 48 | mlp_gelu_match = re.match(r'^mlp(\d+)x_gelu$', projector_type) 49 | if mlp_gelu_match: 50 | mlp_depth = int(mlp_gelu_match.group(1)) 51 | modules = [nn.Linear(config.mm_hidden_size, config.hidden_size)] 52 | for _ in range(1, mlp_depth): 53 | modules.append(nn.GELU()) 54 | modules.append(nn.Linear(config.hidden_size, config.hidden_size)) 55 | return nn.Sequential(*modules) 56 | 57 | if projector_type == 'identity': 58 | return IdentityMap() 59 | 60 | raise ValueError(f'Unknown projector type: {projector_type}') 61 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/seedbench/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | def seed_doc_to_visual(doc): 5 | return [image.convert("RGB") for image in doc["image"]] 6 | 7 | 8 | def seed_doc_to_text(doc): 9 | question = doc["question"] 10 | question += "\n" + f"A. {doc['choice_a']}\n" 11 | question += f"B. {doc['choice_b']}\n" 12 | question += f"C. {doc['choice_c']}\n" 13 | question += f"D. {doc['choice_d']}" 14 | return f"{question}\nAnswer with the option's letter from the given choices directly." 15 | 16 | 17 | def seed_process_result(doc, result): 18 | pred = result[0].strip() 19 | if len(pred) > 1: 20 | pred = pred[0] 21 | answer = doc["answer"] 22 | data_type = doc["data_type"] 23 | 24 | return {f"seed_{data_type}": {"pred": pred, "answer": answer, "question_id": doc["question_id"]}, f"seed_all": {"pred": pred, "answer": answer, "question_id": doc["question_id"]}} 25 | 26 | 27 | def seed_aggregation_result(results): 28 | total_count = 0 29 | total_correct = 0 30 | for result in results: 31 | if result["pred"] == result["answer"]: 32 | total_correct += 1 33 | total_count += 1 34 | return total_correct / total_count 35 | 36 | 37 | def seed_aggregation_result_all(results): 38 | score = seed_aggregation_result(results) 39 | stored_results = [] 40 | for result in results: 41 | stored_results.append({"question_id": result["question_id"], "prediction": result["pred"]}) 42 | with open("./seed_submission.json", "w") as f: 43 | json.dump(stored_results, f, indent=4) 44 | print("Storing files for seed_submission ...") 45 | 46 | return score 47 | 48 | 49 | def seed_doc_to_text_mc(doc): 50 | question = doc["question"] 51 | return f"{question} Answer :" 52 | 53 | 54 | def seed_doc_to_choice(doc): 55 | return [doc["choice_a"], doc["choice_b"], doc["choice_c"], doc["choice_d"]] 56 | 57 | 58 | def seed_doc_to_mc_target(doc): 59 | answer2choice = {"A": "choice_a", "B": "choice_b", "C": "choice_c", "D": "choice_d"} 60 | return doc[answer2choice[doc["answer"]]] 61 | -------------------------------------------------------------------------------- /Eagle2_5/document/3.training.md: -------------------------------------------------------------------------------- 1 | # 🚀 Starting Eagle Training 2 | 3 | ### ✅ Prerequisites 4 | 5 | Make sure you have completed these steps: 6 | 7 | - [1. Setting Environment Variables](./1.setting_env_file.md) 8 | - [2. Installing Eagle Environment](./3.installing.md) 9 | - [3. Preparing Playground Data](./4.preparing_playground.md) 10 | 11 | ### 🟢 Launching Stage-1 Training 12 | Typically, you only need to train on top of our pre-trained model. If you want to start from a base LLM and a Vision Encoder, 13 | please refer to this script: [stage-1 training](./stage_1_training.md) 14 | 15 | ### 🟢 Launching Finetuning 16 | 17 | Run the following command to start training: 18 | 19 | ```bash 20 | GPUS=8 bash shell/train_stage2.sh 1 work_dirs/eagle2.5_debug 21 | ``` 22 | 23 | If everything goes smoothly, your first step log should look like this: 24 | 25 | ```bash 26 | [2025-04-03 06:56:30,930] [INFO] [logging.py:128:log_dist] [Rank 0] time (ms) | optimizer_step: 11.44 27 | [2025-04-03 06:56:30,931] [INFO] [logging.py:128:log_dist] [Rank 0] time (ms) | fwd_microstep: 1148.25 | bwd_microstep: 7335.70 | bwd_inner_microstep: 1056.00 | bwd_allreduce_microstep: 6279.59 | step_microstep: 71.91 28 | [2025-04-03 06:56:30,931] [INFO] [logging.py:128:log_dist] [Rank 0] time (ms) | fwd: 1148.25 | bwd: 7335.70 | bwd_inner: 1056.03 | bwd_allreduce: 6279.59 | step: 71.91 29 | [Step 1 | Rank 0 / GPU 0] Memory: 18552.12 MB, Temperature: 32°C, Power: 121.13 W, 30 | {'loss': 0.9605, 'grad_norm': 1.3685229204449167, 'learning_rate': 2.5e-06, 'epoch': 0.0} 31 | 0%|▌ | 1/248 [00:09<38:40, 9.40s/it] 32 | ``` 33 | 34 | ### Script Parameter Explanation 35 | 36 | We provide detailed documentation explaining all parameters and variables used in our training launch scripts. This includes environment variables, calculated variables, torchrun launcher arguments, and training script parameters. 37 | 38 | For comprehensive information, please refer to our [Script Arguments Documentation](./explain_script_arguments.md). 39 | 40 | ### Notes 41 | If you use internal job watchers to auto-cancel failing runs, ensure your training scripts do not print ignorable exceptions with full Traceback; otherwise watchers may incorrectly terminate jobs. 42 | 43 | 44 | 🎉 **Happy Training!** -------------------------------------------------------------------------------- /Eagle/lmms_eval/api/filter.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List 3 | 4 | from lmms_eval.api.instance import Instance 5 | from datasets import Dataset 6 | 7 | 8 | class Filter: 9 | """ 10 | Filter classes operate on a per-task level. 11 | They take all model outputs (`instance.resps` for all `task.instances`) 12 | across all instances of a task, and perform operations. 13 | In a single run, one can configure any number of separate filters or lists of filters. 14 | 15 | """ 16 | 17 | def __init__(self, *args, **kwargs) -> None: 18 | """ 19 | Can define custom behavior here, if an individual instantiation of a Filter class should have state. 20 | """ 21 | 22 | def apply(self, resps, docs): 23 | """ 24 | Defines the operation to perform on a list of the `inst.resps` properties of `Instance` objects. 25 | Should return the list of (filtered) response lists *in the same order as they were input*, e.g. 26 | if pass in [, ] should return 27 | [, ] 28 | """ 29 | return resps 30 | 31 | 32 | @dataclass 33 | class FilterEnsemble: 34 | """ 35 | FilterEnsemble creates a pipeline applying multiple filters. 36 | Its intended usage is to stack multiple post-processing steps in order. 37 | `task.apply_filters` should use a list of FilterEnsemble classes that it stores, to apply each 38 | pipeline separately. 39 | """ 40 | 41 | name: str 42 | filters: List[Filter] 43 | 44 | def apply(self, instances: List[Instance], docs: List[Dataset]) -> None: 45 | resps = [inst.resps for inst in instances] # operate just on the model responses 46 | for f in self.filters: 47 | # apply filters in sequence 48 | resps = f.apply(resps, docs) 49 | 50 | # add the end results after filtering to filtered_requests of their respective source instances. 51 | # has key `self.name`: each FilterEnsemble applied in a given run should use a different name. 52 | for inst, resp in zip(instances, resps): 53 | inst.filtered_resps[self.name] = resp 54 | -------------------------------------------------------------------------------- /Eagle/eagle/model/multimodal_encoder/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 NVIDIA CORPORATION & AFFILIATES 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # SPDX-License-Identifier: Apache-2.0 16 | # 17 | # Portions of this file are derived from the LLaVA project 18 | # (https://github.com/haotian-liu/LLaVA), licensed under the 19 | # Apache License, Version 2.0. 20 | # 21 | # Modifications © 2024 NVIDIA CORPORATION & AFFILIATES, licensed under 22 | # the Apache License, Version 2.0. 23 | # 24 | # -------------------------------------------------------- 25 | # LLaVA 26 | # Copyright (c) 2023 Haotian Liu 27 | # Licensed under the Apache License, Version 2.0 28 | # -------------------------------------------------------- 29 | 30 | import os 31 | from .clip_encoder import CLIPVisionTower 32 | from .multi_backbone_channel_concatenation_encoder import MultiBackboneChannelConcatenationVisionTower 33 | 34 | def build_vision_tower(vision_tower_cfg, **kwargs): 35 | vision_tower = getattr(vision_tower_cfg, 'mm_vision_tower', getattr(vision_tower_cfg, 'vision_tower', None)) 36 | 37 | if "clip" in vision_tower and vision_tower.startswith("openai"): 38 | is_absolute_path_exists = os.path.exists(vision_tower) 39 | if is_absolute_path_exists or vision_tower.startswith("openai") or vision_tower.startswith("laion") or "ShareGPT4V" in vision_tower: 40 | return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs) 41 | raise ValueError(f'Unknown vision tower: {vision_tower}') 42 | 43 | elif ";" in vision_tower: 44 | return MultiBackboneChannelConcatenationVisionTower(vision_tower, args=vision_tower_cfg) 45 | 46 | raise ValueError(f'Unknown vision tower: {vision_tower}') 47 | -------------------------------------------------------------------------------- /Eagle2_5/eaglevl/sp_utils/attention.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import math 3 | 4 | import torch.distributed as dist 5 | 6 | from .comm import (all_to_all, gather_forward_split_backward, 7 | split_forward_gather_backward) 8 | from .globals import get_pg_manager 9 | 10 | 11 | def pre_process_for_sequence_parallel_attn(query_states, 12 | key_states, 13 | value_states, 14 | scatter_dim=2, 15 | gather_dim=1): 16 | b, s_div_sp, h, d = query_states.shape 17 | sp = get_pg_manager().ulysses_sequence_parallel_world_size 18 | 19 | 20 | # (b, s_div_sp, insp*h, d/insp) -> (b, s, insp*h/sp, d/insp) 21 | sequence_parallel_group = get_pg_manager().ulysses_sequence_parallel_group 22 | query_states = all_to_all( 23 | query_states, 24 | sequence_parallel_group, 25 | scatter_dim=scatter_dim, 26 | gather_dim=gather_dim) 27 | key_states = all_to_all( 28 | key_states, 29 | sequence_parallel_group, 30 | scatter_dim=scatter_dim, 31 | gather_dim=gather_dim) 32 | value_states = all_to_all( 33 | value_states, 34 | sequence_parallel_group, 35 | scatter_dim=scatter_dim, 36 | gather_dim=gather_dim) 37 | 38 | 39 | return query_states, key_states, value_states 40 | 41 | 42 | def post_process_for_sequence_parallel_attn(attn_output, 43 | scatter_dim=1, 44 | gather_dim=2): 45 | sp = get_pg_manager().ulysses_sequence_parallel_world_size 46 | # insp = get_inner_sequence_parallel_world_size() 47 | b, s, h_mul_insp_div_sp, d = attn_output.shape 48 | h = h_mul_insp_div_sp * sp 49 | s_div_sp = s // sp 50 | 51 | 52 | # (b, s, insp*h/sp, d/insp) -> (b, s_div_sp, insp*h, d/insp) 53 | sequence_parallel_group = get_pg_manager().ulysses_sequence_parallel_group 54 | output = all_to_all( 55 | attn_output, 56 | sequence_parallel_group, 57 | scatter_dim=scatter_dim, 58 | gather_dim=gather_dim) 59 | 60 | return output 61 | 62 | -------------------------------------------------------------------------------- /Eagle/eagle/model/consolidate.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 NVIDIA CORPORATION & AFFILIATES 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # SPDX-License-Identifier: Apache-2.0 16 | # 17 | # Portions of this file are derived from the LLaVA project 18 | # (https://github.com/haotian-liu/LLaVA), licensed under the 19 | # Apache License, Version 2.0. 20 | # 21 | # Modifications © 2024 NVIDIA CORPORATION & AFFILIATES, licensed under 22 | # the Apache License, Version 2.0. 23 | # 24 | # -------------------------------------------------------- 25 | # LLaVA 26 | # Copyright (c) 2023 Haotian Liu 27 | # Licensed under the Apache License, Version 2.0 28 | # -------------------------------------------------------- 29 | 30 | 31 | """ 32 | Usage: 33 | python3 -m eagle.model.consolidate --src ~/model_weights/eagle-7b --dst ~/model_weights/eagle-7b_consolidate 34 | """ 35 | import argparse 36 | 37 | import torch 38 | from transformers import AutoTokenizer, AutoModelForCausalLM 39 | from eagle.model import * 40 | from eagle.model.utils import auto_upgrade 41 | 42 | 43 | def consolidate_ckpt(src_path, dst_path): 44 | print("Loading model") 45 | auto_upgrade(src_path) 46 | src_model = AutoModelForCausalLM.from_pretrained(src_path, torch_dtype=torch.float16, low_cpu_mem_usage=True) 47 | src_tokenizer = AutoTokenizer.from_pretrained(src_path, use_fast=False) 48 | src_model.save_pretrained(dst_path) 49 | src_tokenizer.save_pretrained(dst_path) 50 | 51 | 52 | if __name__ == "__main__": 53 | parser = argparse.ArgumentParser() 54 | parser.add_argument("--src", type=str, required=True) 55 | parser.add_argument("--dst", type=str, required=True) 56 | 57 | args = parser.parse_args() 58 | 59 | consolidate_ckpt(args.src, args.dst) 60 | -------------------------------------------------------------------------------- /Eagle/scripts/convert_vqav2_for_submission.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import json 4 | 5 | # for debug 6 | import sys 7 | sys.path.append(os.getcwd()) 8 | 9 | from llava.eval.m4c_evaluator import EvalAIAnswerProcessor 10 | 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--src', type=str, required=True) 15 | parser.add_argument('--save_path', type=str, required=True) 16 | parser.add_argument('--dir', type=str, default="./playground/data/eval/vqav2") 17 | parser.add_argument('--ckpt', type=str, required=True) 18 | parser.add_argument('--split', type=str, required=True) 19 | return parser.parse_args() 20 | 21 | 22 | if __name__ == '__main__': 23 | 24 | args = parse_args() 25 | 26 | # src = os.path.join(args.dir, 'answers', args.split, args.ckpt, 'merge.jsonl') 27 | src = args.src 28 | test_split = os.path.join(args.dir, 'llava_vqav2_mscoco_test2015.jsonl') 29 | # dst = os.path.join(args.dir, 'answers_upload', args.split, f'vqav2_test_{args.ckpt}.json') 30 | dst = args.save_path 31 | os.makedirs(os.path.dirname(dst), exist_ok=True) 32 | 33 | results = [] 34 | error_line = 0 35 | for line_idx, line in enumerate(open(src)): 36 | try: 37 | results.append(json.loads(line)) 38 | except: 39 | error_line += 1 40 | 41 | results = {x['question_id']: x['text'] for x in results} 42 | test_split = [json.loads(line) for line in open(test_split)] 43 | split_ids = set([x['question_id'] for x in test_split]) 44 | 45 | print(f'total results: {len(results)}, total split: {len(test_split)}, error_line: {error_line}') 46 | 47 | all_answers = [] 48 | 49 | answer_processor = EvalAIAnswerProcessor() 50 | 51 | for x in test_split: 52 | if x['question_id'] not in results: 53 | all_answers.append({ 54 | 'question_id': x['question_id'], 55 | 'answer': '' 56 | }) 57 | else: 58 | all_answers.append({ 59 | 'question_id': x['question_id'], 60 | 'answer': answer_processor(results[x['question_id']]) 61 | }) 62 | 63 | with open(dst, 'w') as f: 64 | json.dump(all_answers, open(dst, 'w')) 65 | 66 | print(f"successfully saving results to {dst}") 67 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/iconqa/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | 5 | def options_to_str(options_prompt): 6 | option_prompt_str = "" 7 | for i, option in enumerate(options_prompt): 8 | option_choice = chr(ord("A") + i) 9 | option_prompt_str += f"{option_choice}. {option}\n" 10 | 11 | option_prompt_str = option_prompt_str.rstrip("\n") 12 | return option_prompt_str 13 | 14 | 15 | def doc_to_visual(doc): 16 | image_list = [] 17 | if "query_image" in doc: 18 | image_list.append(doc["query_image"].convert("RGB")) 19 | for i in range(5): 20 | id = f"choice_image_{i}" 21 | if id in doc and doc[id] is not None: 22 | image_list.append(doc[id].convert("RGB")) 23 | assert len(image_list) < 6, "Maximum 5 images allowed for ICON-QA" 24 | return image_list 25 | 26 | 27 | def doc_to_text(doc, model_specific_prompt_kwargs): 28 | question = doc["question"] 29 | ques_type = doc["ques_type"] 30 | options_prompt = [] 31 | 32 | if ques_type == "choose_img": 33 | options_prompt.append("The first image.") 34 | options_prompt.append("The second image.") 35 | 36 | options_str = options_to_str(options_prompt) 37 | full_prompt = f"{model_specific_prompt_kwargs['pre_prompt']}{model_specific_prompt_kwargs['statement']}{model_specific_prompt_kwargs['options_statement'].format(question=question, options=options_str)}" 38 | 39 | elif ques_type == "choose_txt": 40 | choices = doc["choices"].split(",") 41 | for i, choice in enumerate(choices): 42 | options_prompt.append(f"{choice}") 43 | 44 | options_str = options_to_str(options_prompt) 45 | full_prompt = f"{model_specific_prompt_kwargs['pre_prompt']}{model_specific_prompt_kwargs['statement']}{model_specific_prompt_kwargs['options_statement'].format(question=question, options=options_str)}" 46 | 47 | elif ques_type == "fill_in_blank": 48 | full_prompt = f"{model_specific_prompt_kwargs['pre_prompt']}{model_specific_prompt_kwargs['statement']}{model_specific_prompt_kwargs['freeform_statement'].format(question=question)}" 49 | 50 | return full_prompt 51 | 52 | 53 | def test_process_results(doc, results): 54 | pred = results[0] 55 | questionId = doc["question_id"] 56 | answer = doc["answer"] 57 | return {"anls": {"questionId": int(questionId), "answer": answer, "pred_answer": pred}} 58 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/seedbench_2/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | def seed_doc_to_visual(doc): 5 | return [image.convert("RGB") for image in doc["image"]] 6 | 7 | 8 | def parse_choice_img(choice: str, img_token: str): 9 | if "jpg" in choice or "png" in choice: 10 | return img_token 11 | return choice 12 | 13 | 14 | def seed_doc_to_text(doc, model_specific_kwargs=None): 15 | question = doc["question"] 16 | question.replace("", model_specific_kwargs["img_token"]) 17 | question += "\n" + f"A. {parse_choice_img(doc['choice_a'], model_specific_kwargs['img_token'])}\n" 18 | question += f"B. {parse_choice_img(doc['choice_b'], model_specific_kwargs['img_token'])}\n" 19 | question += f"C. {parse_choice_img(doc['choice_c'], model_specific_kwargs['img_token'])}\n" 20 | question += f"D. {parse_choice_img(doc['choice_d'], model_specific_kwargs['img_token'])}" 21 | if doc["data_type"] == "Image Generation": 22 | num_img_in_question = len(doc["data_id"]) - 4 23 | prepend_tokens = [model_specific_kwargs["img_token"]] * num_img_in_question 24 | question = " ".join(prepend_tokens) + "\n" + question 25 | return f"{question}\n{model_specific_kwargs['post_prompt']}" 26 | 27 | 28 | def seed_process_result(doc, result): 29 | pred = result[0].strip() 30 | if len(pred) > 1: 31 | pred = pred[0] 32 | answer = doc["answer"] 33 | data_type = doc["data_type"].split(" ") 34 | data_type = "_".join(data_type) 35 | 36 | return {f"seed_{data_type}": {"pred": pred, "answer": answer, "question_id": doc["question_id"]}, f"seed_all": {"pred": pred, "answer": answer, "question_id": doc["question_id"]}} 37 | 38 | 39 | def seed_aggregation_result(results): 40 | total_count = 0 41 | total_correct = 0 42 | for result in results: 43 | if result["pred"] == result["answer"]: 44 | total_correct += 1 45 | total_count += 1 46 | return total_correct / total_count if total_count != 0 else 0 47 | 48 | 49 | def seed_aggregation_result_all(results): 50 | score = seed_aggregation_result(results) 51 | stored_results = [] 52 | for result in results: 53 | stored_results.append({"question_id": result["question_id"], "prediction": result["pred"]}) 54 | with open("./seed_submission.json", "w") as f: 55 | json.dump(stored_results, f, indent=4) 56 | print("Storing files for seed_submission ...") 57 | 58 | return score 59 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/ok_vqa/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import json 4 | import yaml 5 | import pathlib 6 | import logging 7 | import datetime 8 | import statistics 9 | 10 | from lmms_eval.tasks._task_utils.file_utils import generate_submission_file 11 | from lmms_eval.tasks._task_utils.vqa_eval_metric import EvalAIAnswerProcessor 12 | 13 | eval_logger = logging.getLogger("lmms-eval") 14 | 15 | 16 | def ok_vqa_doc_to_visual(doc): 17 | return [doc["image"].convert("RGB")] 18 | 19 | 20 | def ok_vqa_process_results(doc, result): 21 | eval_ai_processor = EvalAIAnswerProcessor() 22 | assert len(result) == 1, f"The result should be a list of length 1, but got {len(result)}." 23 | resAns = eval_ai_processor(result[0]) 24 | accuracy = 0 25 | 26 | if "answers" in doc and doc["answers"] is not None: 27 | gtAcc = [] 28 | 29 | for i in range(len(doc["answers"])): 30 | doc["answers"][i] = eval_ai_processor(doc["answers"][i]) 31 | 32 | for i in range(len(doc["answers"])): 33 | otherGTAns = [doc["answers"][j] for j in range(len(doc["answers"])) if i != j] 34 | matchingAns = [item for item in otherGTAns if item == resAns] 35 | acc = min(1, float(len(matchingAns)) / 3) 36 | gtAcc.append(acc) 37 | if gtAcc: 38 | accuracy = statistics.mean(gtAcc) 39 | else: 40 | accuracy = 0 41 | 42 | return { 43 | "exact_match": accuracy, 44 | "submission": { 45 | "image": f"{doc['question_id']}.jpg", 46 | "answer": resAns, 47 | }, 48 | } 49 | 50 | 51 | def ok_vqa_doc_to_text(doc, model_specific_prompt_kwargs=None): 52 | question = doc["question"] 53 | if model_specific_prompt_kwargs is None: 54 | model_specific_prompt_kwargs = {} 55 | pre_prompt = "" 56 | post_prompt = "" 57 | if "pre_prompt" in model_specific_prompt_kwargs: 58 | pre_prompt = model_specific_prompt_kwargs["pre_prompt"] 59 | if "post_prompt" in model_specific_prompt_kwargs: 60 | post_prompt = model_specific_prompt_kwargs["post_prompt"] 61 | return f"{pre_prompt}{question}{post_prompt}" 62 | 63 | 64 | def ok_vqa_aggreate_submissions(results, args): 65 | now_date_time = datetime.datetime.now().strftime("%Y-%m%d-%H%M-%S") 66 | file = f"ok_vqa-test-submission-{now_date_time}.json" 67 | path = generate_submission_file(file, args) 68 | with open(path, "w") as f: 69 | json.dump(results, f) 70 | print(f"Submission file saved to {path}") 71 | -------------------------------------------------------------------------------- /Eagle2_5/README.md: -------------------------------------------------------------------------------- 1 | # 🦅 Eagle 2.5 2 | 3 | Eagle 2.5 is a multimodal large model (image/video × text). This repository provides the end-to-end guidance and scripts for the environment setup, data preparation, training, and inference of the Eagle VLM. 4 | 5 | --- 6 | 7 | ## 📚 Quick Start (Onboarding) 8 | 9 | Recommended order: 10 | 11 | 1) Set environment variables → 2) Install → 3) Prepare data → 4) Train → 5) Demo → 6) Inference 12 | 13 | - Onboarding overview: see `./document/0.onboarding.md` 14 | 15 | --- 16 | 17 | ## ⚙️ Installation & Environment 18 | 19 | - Detailed steps and dependencies: `./document/1.installing.md` 20 | - Conda environment (Python 3.10) 21 | - PyTorch and FlashAttention (match your CUDA) 22 | - Install this repo with `pip install -e .` 23 | - Troubleshooting notes (specific Transformers version, OpenCV dependencies, etc.) 24 | 25 | --- 26 | 27 | ## 📂 Data Preparation (Playground) 28 | 29 | - Directory structure and JSONL/LMDB examples: `./document/2.preparing_playground.md` 30 | - `playground/sft_recipe` (data recipe) 31 | - `playground/sft_jsonl` and `playground/sft_data` (annotations and raw data) 32 | - Example parquet→LMDB conversion scripts are not included in this repo 33 | - Use `shell/prepare.sh` to normalize and generate `.prepare.json` (internal `submit_prepare_job.sh` is not included) 34 | - LMDB reading example and tips: `./document/how_to_use_lmdb_to_read_images.md` 35 | 36 | --- 37 | 38 | ## 💪 Training (Stage-2 / Finetuning) 39 | 40 | - Full training entry points and multinode/multigpu options: `./document/3.training.md` 41 | - Single-node example: `GPUS=8 bash shell/train_stage2.sh 1 work_dirs/eagle2.5_debug` 42 | - Multi-node example (srun/internal submit_job): `PARTITION=xxx GPUS=16 bash shell/train_stage2.sh 2 work_dirs/eagle2.5_multinode` 43 | 44 | --- 45 | 46 | ## ✨ Launching Streamlit Demo 47 | 48 | - Interactive testing of the VLM with UI. Refer to document for more details: `./document/4.streamlit_demo.md` 49 | 50 | --- 51 | 52 | ## 🔮 Inference 53 | 54 | - End-to-end usage and multimodal examples (single/multiple images, single/multiple videos, streaming, batch): `./document/5.inference.md` 55 | - Load with `transformers` `AutoModel`/`AutoProcessor`: `"nvidia/Eagle-2.5-8B"` 56 | - Recommended `torch_dtype=torch.bfloat16`; run `model.generate(...)` on GPU 57 | 58 | --- 59 | 60 | ## License 61 | 62 | - See `LICENSE` and `LICENSE_MODEL` at the repository root. 63 | 64 | --- 65 | 66 | For detailed parameter explanations and launcher script notes, see: `./document/explain_script_arguments.md`. 67 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/vizwiz_vqa/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import json 4 | import yaml 5 | import pathlib 6 | import logging 7 | import datetime 8 | import statistics 9 | 10 | from lmms_eval.tasks._task_utils.file_utils import generate_submission_file 11 | from lmms_eval.tasks._task_utils.vqa_eval_metric import EvalAIAnswerProcessor 12 | 13 | eval_logger = logging.getLogger("lmms-eval") 14 | 15 | 16 | def vizwiz_vqa_doc_to_visual(doc): 17 | return [doc["image"].convert("RGB")] 18 | 19 | 20 | def vizwiz_vqa_process_results(doc, result): 21 | eval_ai_processor = EvalAIAnswerProcessor() 22 | assert len(result) == 1, f"The result should be a list of length 1, but got {len(result)}." 23 | resAns = eval_ai_processor(result[0]) 24 | accuracy = 0 25 | 26 | if "answers" in doc and doc["answers"] is not None: 27 | gtAcc = [] 28 | 29 | for i in range(len(doc["answers"])): 30 | doc["answers"][i] = eval_ai_processor(doc["answers"][i]) 31 | 32 | for i in range(len(doc["answers"])): 33 | otherGTAns = [doc["answers"][j] for j in range(len(doc["answers"])) if i != j] 34 | matchingAns = [item for item in otherGTAns if item == resAns] 35 | acc = min(1, float(len(matchingAns)) / 3) 36 | gtAcc.append(acc) 37 | if gtAcc: 38 | accuracy = statistics.mean(gtAcc) 39 | else: 40 | accuracy = 0 41 | 42 | return { 43 | "exact_match": accuracy, 44 | "submission": { 45 | "image": f"{doc['question_id']}.jpg", 46 | "answer": resAns, 47 | }, 48 | } 49 | 50 | 51 | def vizwiz_vqa_doc_to_text(doc, model_specific_prompt_kwargs=None): 52 | if model_specific_prompt_kwargs is None: 53 | model_specific_prompt_kwargs = {} 54 | pre_prompt = "" 55 | post_prompt = "" 56 | if "pre_prompt" in model_specific_prompt_kwargs: 57 | pre_prompt = model_specific_prompt_kwargs["pre_prompt"] 58 | if "post_prompt" in model_specific_prompt_kwargs: 59 | post_prompt = model_specific_prompt_kwargs["post_prompt"] 60 | text = f"{pre_prompt}{doc['question'].capitalize()}{post_prompt}" 61 | return text 62 | 63 | 64 | def vizwiz_vqa_aggreate_submissions(results, args): 65 | now_date_time = datetime.datetime.now().strftime("%Y-%m%d-%H%M-%S") 66 | submission_file_name = f"vizwiz_vqa-test-submission-{now_date_time}.json" 67 | path = generate_submission_file(submission_file_name, args) 68 | with open(path, "w") as f: 69 | json.dump(results, f) 70 | print(f"Submission file saved to {path}") 71 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/olympiadbench/cn_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import datetime 4 | from lmms_eval.tasks.olympiadbench.olympiadbench_evals import OlympiadBenchEvaluator 5 | from lmms_eval.tasks._task_utils.file_utils import generate_submission_file 6 | 7 | import logging 8 | eval_logger = logging.getLogger("lmms-eval") 9 | dir_name = os.path.dirname(os.path.abspath(__file__)) 10 | 11 | olympiadbench_evaluator = OlympiadBenchEvaluator() 12 | 13 | def olympiadbench_doc_to_visual(doc): 14 | return [image.convert("RGB") for image in doc["images"]] 15 | 16 | def olympiadbench_doc_to_text(doc): 17 | question = doc["question"] 18 | subject = doc["subfield"] 19 | mul_ans = doc["is_multiple_answer"] 20 | if mul_ans is None: 21 | mul_ans = False 22 | ans_type = doc["answer_type"] 23 | if ans_type == "Need_human_evaluate": 24 | ans_type = "proof based" 25 | 26 | pre_prompt = f"以下是中国{subject}竞赛中的解答题。\n" 27 | 28 | post_prompt = "" 29 | if not mul_ans: 30 | post_prompt += f"答案类型为{ans_type}。\n" 31 | else: 32 | post_prompt += f"题目有多个答案,答案类型均为{ans_type}。\n" 33 | post_prompt += "请根据题目的要求和所提供的信息计算得出答案。解答过程和结果中使用的变量和公式请使用LaTeX格式表示。请在最后以" 34 | if not mul_ans: 35 | post_prompt += '"所以最终答案是\\boxed{答案}。"\n' 36 | else: 37 | post_prompt += '"所以最终答案是\\boxed{用英⽂逗号连接的多个答案}。"\n' 38 | 39 | final_question = pre_prompt + question + '\n' + post_prompt 40 | return final_question 41 | 42 | def olympiadbench_process_results(doc, results): 43 | precision = doc["error"] 44 | is_proving = "TP" in doc["source"] 45 | if precision is None: 46 | precision = 0 47 | prediction = results[0].strip() 48 | 49 | if is_proving: 50 | return { 51 | "submission": prediction 52 | } 53 | else: 54 | prediction = prediction.split("所以最终答案是")[-1] 55 | prediction = prediction.replace('"', "").replace("\n", "").replace(" ", "").strip(".").strip("。") 56 | accuracy = olympiadbench_evaluator.judge(prediction, doc["final_answer"][0], precision) 57 | accuracy = int(accuracy) 58 | return { 59 | "exact_match": accuracy 60 | } 61 | 62 | def olympiadbench_aggregate_results(results, args): 63 | now_date_time = datetime.datetime.now().strftime("%Y-%m%d-%H%M-%S") 64 | submission_file_name = f"olympiadbench-test-cn-submission-{now_date_time}.json" 65 | path = generate_submission_file(submission_file_name, args) 66 | with open(path, "w") as f: 67 | json.dump(results, f, ensure_ascii=False) 68 | print(f"Submission file saved to {path}") 69 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/chartqa/utils.py: -------------------------------------------------------------------------------- 1 | def chartqa_doc_to_visual(doc): 2 | return [doc["image"].convert("RGB")] 3 | 4 | 5 | def chartqa_doc_to_text(doc, model_specific_prompt_kwargs): 6 | question = doc["question"] 7 | pre_prompt = model_specific_prompt_kwargs["pre_prompt"] 8 | post_prompt = model_specific_prompt_kwargs["post_prompt"] 9 | return f"{pre_prompt}{question}{post_prompt}" 10 | 11 | 12 | def chartqa_process_results(doc, results): 13 | pred = results[0] 14 | type = doc["type"] 15 | score = relaxed_correctness(pred, doc["answer"]) 16 | score = 1.0 if score else 0.0 17 | return_dict = {"relaxed_overall": score} 18 | if type == "human_test": 19 | return_dict["relaxed_human_split"] = score 20 | else: 21 | return_dict["relaxed_augmented_split"] = score 22 | return return_dict 23 | 24 | 25 | def relaxed_correctness(prediction, target, max_relative_change: float = 0.05) -> bool: 26 | """Calculates relaxed correctness. 27 | 28 | The correctness tolerates certain error ratio defined by max_relative_change. 29 | See https://arxiv.org/pdf/2203.10244.pdf, end of section 5.1: 30 | “Following Methani et al. (2020), we use a relaxed accuracy measure for the 31 | numeric answers to allow a minor inaccuracy that may result from the automatic 32 | data extraction process. We consider an answer to be correct if it is within 33 | 5% of the gold answer. For non-numeric answers, we still need an exact match 34 | to consider an answer to be correct.” 35 | 36 | This funcion is taken from https://github.com/QwenLM/Qwen-VL/blob/34b4c0ee7b07726371b960911f249fe61b362ca3/eval_mm/evaluate_vqa.py#L113 37 | Args: 38 | target: List of target string. 39 | prediction: List of predicted string. 40 | max_relative_change: Maximum relative change. 41 | 42 | Returns: 43 | Whether the prediction was correct given the specified tolerance. 44 | """ 45 | 46 | def _to_float(text: str): 47 | try: 48 | if text.endswith("%"): 49 | # Convert percentages to floats. 50 | return float(text.rstrip("%")) / 100.0 51 | else: 52 | return float(text) 53 | except ValueError: 54 | return None 55 | 56 | prediction_float = _to_float(prediction) 57 | target_float = _to_float(target) 58 | if prediction_float is not None and target_float: 59 | relative_change = abs(prediction_float - target_float) / abs(target_float) 60 | return relative_change <= max_relative_change 61 | else: 62 | return prediction.lower() == target.lower() 63 | -------------------------------------------------------------------------------- /Eagle/lmms_eval/tasks/textvqa/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import json 4 | import yaml 5 | import pathlib 6 | import logging 7 | import datetime 8 | import statistics 9 | 10 | from lmms_eval.tasks._task_utils.vqa_eval_metric import EvalAIAnswerProcessor 11 | from lmms_eval.tasks._task_utils.file_utils import generate_submission_file 12 | 13 | eval_logger = logging.getLogger("lmms-eval") 14 | 15 | 16 | def textvqa_doc_to_visual(doc): 17 | return [doc["image"].convert("RGB")] 18 | 19 | 20 | def textvqa_process_results(doc, result): 21 | eval_ai_processor = EvalAIAnswerProcessor() 22 | assert len(result) == 1, f"The result should be a list of length 1, but got {len(result)}." 23 | resAns = eval_ai_processor(result[0]) 24 | accuracy = 0 25 | 26 | if "answers" in doc and doc["answers"] is not None: 27 | gtAcc = [] 28 | 29 | for i in range(len(doc["answers"])): 30 | doc["answers"][i] = eval_ai_processor(doc["answers"][i]) 31 | 32 | for i in range(len(doc["answers"])): 33 | otherGTAns = [doc["answers"][j] for j in range(len(doc["answers"])) if i != j] 34 | matchingAns = [item for item in otherGTAns if item == resAns] 35 | acc = min(1, float(len(matchingAns)) / 3) 36 | gtAcc.append(acc) 37 | accuracy = statistics.mean(gtAcc) 38 | 39 | return { 40 | "exact_match": accuracy, 41 | "submission": { 42 | "question_id": doc["question_id"], 43 | "answer": resAns, 44 | }, 45 | } 46 | 47 | 48 | def textvqa_doc_to_text(doc, model_specific_prompt_kwargs=None): 49 | pre_prompt = "" 50 | post_post = "" 51 | ocr_ref = "" 52 | if model_specific_prompt_kwargs: 53 | if "pre_prompt" in model_specific_prompt_kwargs: 54 | pre_prompt = model_specific_prompt_kwargs["pre_prompt"] 55 | if "post_prompt" in model_specific_prompt_kwargs: 56 | post_prompt = model_specific_prompt_kwargs["post_prompt"] 57 | if "ocr" in model_specific_prompt_kwargs and model_specific_prompt_kwargs["ocr"]: 58 | ocr_ref = f"\nReference OCR token: {', '.join(doc['ocr_tokens'])}" 59 | return f"{pre_prompt}{doc['question'].capitalize()}{ocr_ref}{post_prompt}" 60 | 61 | 62 | def textvqa_aggreate_submissions(results, args): 63 | now_date_time = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") 64 | path = generate_submission_file(f"textvqa_submission_{now_date_time}.json", args) 65 | with open(path, "w") as f: 66 | json.dump(results, f) 67 | # print(f"Submission file saved to {path}") 68 | eval_logger.info(f"Submission file saved to {path}") 69 | --------------------------------------------------------------------------------